2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/policy_internal.h>
76 #include <kern/task.h>
77 #include <kern/thread.h>
78 #include <kern/kalloc.h>
79 #include <kern/zalloc.h>
81 #include <kern/ledger.h>
83 #include <vm/vm_init.h>
84 #include <vm/vm_map.h>
85 #include <vm/vm_page.h>
86 #include <vm/vm_pageout.h>
87 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
88 #include <kern/misc_protos.h>
89 #include <zone_debug.h>
90 #include <mach_debug/zone_info.h>
92 #include <pexpert/pexpert.h>
93 #include <san/kasan.h>
95 #include <vm/vm_protos.h>
96 #include <vm/memory_object.h>
97 #include <vm/vm_purgeable_internal.h>
98 #include <vm/vm_compressor.h>
100 #if CONFIG_PHANTOM_CACHE
101 #include <vm/vm_phantom_cache.h>
104 #include <IOKit/IOHibernatePrivate.h>
106 #include <sys/kdebug.h>
109 char vm_page_inactive_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
110 char vm_page_pageable_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
111 char vm_page_non_speculative_pageable_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
112 char vm_page_active_or_inactive_states
[VM_PAGE_Q_STATE_ARRAY_SIZE
];
114 #if CONFIG_SECLUDED_MEMORY
115 struct vm_page_secluded_data vm_page_secluded
;
116 #endif /* CONFIG_SECLUDED_MEMORY */
118 boolean_t hibernate_cleaning_in_progress
= FALSE
;
119 boolean_t vm_page_free_verify
= TRUE
;
121 uint32_t vm_lopage_free_count
= 0;
122 uint32_t vm_lopage_free_limit
= 0;
123 uint32_t vm_lopage_lowater
= 0;
124 boolean_t vm_lopage_refill
= FALSE
;
125 boolean_t vm_lopage_needed
= FALSE
;
127 lck_mtx_ext_t vm_page_queue_lock_ext
;
128 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
129 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
131 int speculative_age_index
= 0;
132 int speculative_steal_index
= 0;
133 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
136 __private_extern__
void vm_page_init_lck_grp(void);
138 static void vm_page_free_prepare(vm_page_t page
);
139 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
141 static void vm_tag_init(void);
143 uint64_t vm_min_kernel_and_kext_address
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
144 uint32_t vm_packed_from_vm_pages_array_mask
= VM_PACKED_FROM_VM_PAGES_ARRAY
;
145 uint32_t vm_packed_pointer_shift
= VM_PACKED_POINTER_SHIFT
;
148 * Associated with page of user-allocatable memory is a
153 * These variables record the values returned by vm_page_bootstrap,
154 * for debugging purposes. The implementation of pmap_steal_memory
155 * and pmap_startup here also uses them internally.
158 vm_offset_t virtual_space_start
;
159 vm_offset_t virtual_space_end
;
160 uint32_t vm_page_pages
;
163 * The vm_page_lookup() routine, which provides for fast
164 * (virtual memory object, offset) to page lookup, employs
165 * the following hash table. The vm_page_{insert,remove}
166 * routines install and remove associations in the table.
167 * [This table is often called the virtual-to-physical,
171 vm_page_packed_t page_list
;
172 #if MACH_PAGE_HASH_STATS
173 int cur_count
; /* current count */
174 int hi_count
; /* high water mark */
175 #endif /* MACH_PAGE_HASH_STATS */
179 #define BUCKETS_PER_LOCK 16
181 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
182 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
183 unsigned int vm_page_hash_mask
; /* Mask for hash function */
184 unsigned int vm_page_hash_shift
; /* Shift for hash function */
185 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
186 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
188 #ifndef VM_TAG_ACTIVE_UPDATE
189 #error VM_TAG_ACTIVE_UPDATE
191 #ifndef VM_MAX_TAG_ZONES
192 #error VM_MAX_TAG_ZONES
195 boolean_t vm_tag_active_update
= VM_TAG_ACTIVE_UPDATE
;
196 lck_spin_t
*vm_page_bucket_locks
;
197 lck_spin_t vm_objects_wired_lock
;
198 lck_spin_t vm_allocation_sites_lock
;
200 vm_allocation_site_t vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
+ 1];
201 vm_allocation_site_t
* vm_allocation_sites
[VM_MAX_TAG_VALUE
];
203 vm_allocation_zone_total_t
** vm_allocation_zone_totals
;
204 #endif /* VM_MAX_TAG_ZONES */
206 vm_tag_t vm_allocation_tag_highest
;
208 #if VM_PAGE_BUCKETS_CHECK
209 boolean_t vm_page_buckets_check_ready
= FALSE
;
210 #if VM_PAGE_FAKE_BUCKETS
211 vm_page_bucket_t
*vm_page_fake_buckets
; /* decoy buckets */
212 vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
213 #endif /* VM_PAGE_FAKE_BUCKETS */
214 #endif /* VM_PAGE_BUCKETS_CHECK */
218 #if MACH_PAGE_HASH_STATS
219 /* This routine is only for debug. It is intended to be called by
220 * hand by a developer using a kernel debugger. This routine prints
221 * out vm_page_hash table statistics to the kernel debug console.
231 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
232 if (vm_page_buckets
[i
].hi_count
) {
234 highsum
+= vm_page_buckets
[i
].hi_count
;
235 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
236 maxdepth
= vm_page_buckets
[i
].hi_count
;
239 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
240 printf("Number used buckets: %d = %d%%\n",
241 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
242 printf("Number unused buckets: %d = %d%%\n",
243 vm_page_bucket_count
- numbuckets
,
244 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
245 printf("Sum of bucket max depth: %d\n", highsum
);
246 printf("Average bucket depth: %d.%2d\n",
247 highsum
/vm_page_bucket_count
,
248 highsum%vm_page_bucket_count
);
249 printf("Maximum bucket depth: %d\n", maxdepth
);
251 #endif /* MACH_PAGE_HASH_STATS */
254 * The virtual page size is currently implemented as a runtime
255 * variable, but is constant once initialized using vm_set_page_size.
256 * This initialization must be done in the machine-dependent
257 * bootstrap sequence, before calling other machine-independent
260 * All references to the virtual page size outside this
261 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
264 #if defined(__arm__) || defined(__arm64__)
269 vm_size_t page_size
= PAGE_SIZE
;
270 vm_size_t page_mask
= PAGE_MASK
;
271 int page_shift
= PAGE_SHIFT
;
275 * Resident page structures are initialized from
276 * a template (see vm_page_alloc).
278 * When adding a new field to the virtual memory
279 * object structure, be sure to add initialization
280 * (see vm_page_bootstrap).
282 struct vm_page vm_page_template
;
284 vm_page_t vm_pages
= VM_PAGE_NULL
;
285 vm_page_t vm_page_array_beginning_addr
;
286 vm_page_t vm_page_array_ending_addr
;
287 vm_page_t vm_page_array_boundary
;
289 unsigned int vm_pages_count
= 0;
290 ppnum_t vm_page_lowest
= 0;
293 * Resident pages that represent real memory
294 * are allocated from a set of free lists,
297 unsigned int vm_colors
;
298 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
299 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
300 unsigned int vm_free_magazine_refill_limit
= 0;
303 struct vm_page_queue_free_head
{
304 vm_page_queue_head_t qhead
;
305 } __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
307 struct vm_page_queue_free_head vm_page_queue_free
[MAX_COLORS
];
310 unsigned int vm_page_free_wanted
;
311 unsigned int vm_page_free_wanted_privileged
;
312 #if CONFIG_SECLUDED_MEMORY
313 unsigned int vm_page_free_wanted_secluded
;
314 #endif /* CONFIG_SECLUDED_MEMORY */
315 unsigned int vm_page_free_count
;
318 * Occasionally, the virtual memory system uses
319 * resident page structures that do not refer to
320 * real pages, for example to leave a page with
321 * important state information in the VP table.
323 * These page structures are allocated the way
324 * most other kernel structures are.
326 zone_t vm_page_array_zone
;
328 vm_locks_array_t vm_page_locks
;
329 decl_lck_mtx_data(,vm_page_alloc_lock
)
330 lck_mtx_ext_t vm_page_alloc_lock_ext
;
332 unsigned int io_throttle_zero_fill
;
334 unsigned int vm_page_local_q_count
= 0;
335 unsigned int vm_page_local_q_soft_limit
= 250;
336 unsigned int vm_page_local_q_hard_limit
= 500;
337 struct vplq
*vm_page_local_q
= NULL
;
339 /* N.B. Guard and fictitious pages must not
340 * be assigned a zero phys_page value.
343 * Fictitious pages don't have a physical address,
344 * but we must initialize phys_page to something.
345 * For debugging, this should be a strange value
346 * that the pmap module can recognize in assertions.
348 const ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
351 * Guard pages are not accessible so they don't
352 * need a physical address, but we need to enter
354 * Let's make it recognizable and make sure that
355 * we don't use a real physical page with that
358 const ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
361 * Resident page structures are also chained on
362 * queues that are used by the page replacement
363 * system (pageout daemon). These queues are
364 * defined here, but are shared by the pageout
365 * module. The inactive queue is broken into
366 * file backed and anonymous for convenience as the
367 * pageout daemon often assignes a higher
368 * importance to anonymous pages (less likely to pick)
370 vm_page_queue_head_t vm_page_queue_active
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
371 vm_page_queue_head_t vm_page_queue_inactive
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
372 #if CONFIG_SECLUDED_MEMORY
373 vm_page_queue_head_t vm_page_queue_secluded
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
374 #endif /* CONFIG_SECLUDED_MEMORY */
375 vm_page_queue_head_t vm_page_queue_anonymous
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
))); /* inactive memory queue for anonymous pages */
376 vm_page_queue_head_t vm_page_queue_throttled
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
378 queue_head_t vm_objects_wired
;
380 #if CONFIG_BACKGROUND_QUEUE
381 vm_page_queue_head_t vm_page_queue_background
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
382 uint32_t vm_page_background_target
;
383 uint32_t vm_page_background_count
;
384 uint64_t vm_page_background_promoted_count
;
386 uint32_t vm_page_background_internal_count
;
387 uint32_t vm_page_background_external_count
;
389 uint32_t vm_page_background_mode
;
390 uint32_t vm_page_background_exclude_external
;
393 unsigned int vm_page_active_count
;
394 unsigned int vm_page_inactive_count
;
395 #if CONFIG_SECLUDED_MEMORY
396 unsigned int vm_page_secluded_count
;
397 unsigned int vm_page_secluded_count_free
;
398 unsigned int vm_page_secluded_count_inuse
;
399 #endif /* CONFIG_SECLUDED_MEMORY */
400 unsigned int vm_page_anonymous_count
;
401 unsigned int vm_page_throttled_count
;
402 unsigned int vm_page_speculative_count
;
404 unsigned int vm_page_wire_count
;
405 unsigned int vm_page_wire_count_on_boot
= 0;
406 unsigned int vm_page_stolen_count
;
407 unsigned int vm_page_wire_count_initial
;
408 unsigned int vm_page_pages_initial
;
409 unsigned int vm_page_gobble_count
= 0;
411 #define VM_PAGE_WIRE_COUNT_WARNING 0
412 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
414 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
415 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
416 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
418 unsigned int vm_page_xpmapped_external_count
= 0;
419 unsigned int vm_page_external_count
= 0;
420 unsigned int vm_page_internal_count
= 0;
421 unsigned int vm_page_pageable_external_count
= 0;
422 unsigned int vm_page_pageable_internal_count
= 0;
424 #if DEVELOPMENT || DEBUG
425 unsigned int vm_page_speculative_recreated
= 0;
426 unsigned int vm_page_speculative_created
= 0;
427 unsigned int vm_page_speculative_used
= 0;
430 vm_page_queue_head_t vm_page_queue_cleaned
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
432 unsigned int vm_page_cleaned_count
= 0;
433 unsigned int vm_pageout_enqueued_cleaned
= 0;
435 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
436 ppnum_t max_valid_low_ppnum
= 0xffffffff;
440 * Several page replacement parameters are also
441 * shared with this module, so that page allocation
442 * (done here in vm_page_alloc) can trigger the
445 unsigned int vm_page_free_target
= 0;
446 unsigned int vm_page_free_min
= 0;
447 unsigned int vm_page_throttle_limit
= 0;
448 unsigned int vm_page_inactive_target
= 0;
449 #if CONFIG_SECLUDED_MEMORY
450 unsigned int vm_page_secluded_target
= 0;
451 #endif /* CONFIG_SECLUDED_MEMORY */
452 unsigned int vm_page_anonymous_min
= 0;
453 unsigned int vm_page_inactive_min
= 0;
454 unsigned int vm_page_free_reserved
= 0;
455 unsigned int vm_page_throttle_count
= 0;
459 * The VM system has a couple of heuristics for deciding
460 * that pages are "uninteresting" and should be placed
461 * on the inactive queue as likely candidates for replacement.
462 * These variables let the heuristics be controlled at run-time
463 * to make experimentation easier.
466 boolean_t vm_page_deactivate_hint
= TRUE
;
468 struct vm_page_stats_reusable vm_page_stats_reusable
;
473 * Sets the page size, perhaps based upon the memory
474 * size. Must be called before any use of page-size
475 * dependent functions.
477 * Sets page_shift and page_mask from page_size.
480 vm_set_page_size(void)
482 page_size
= PAGE_SIZE
;
483 page_mask
= PAGE_MASK
;
484 page_shift
= PAGE_SHIFT
;
486 if ((page_mask
& page_size
) != 0)
487 panic("vm_set_page_size: page size not a power of two");
489 for (page_shift
= 0; ; page_shift
++)
490 if ((1U << page_shift
) == page_size
)
494 #if defined (__x86_64__)
496 #define MAX_CLUMP_SIZE 16
497 #define DEFAULT_CLUMP_SIZE 4
499 unsigned int vm_clump_size
, vm_clump_mask
, vm_clump_shift
, vm_clump_promote_threshold
;
501 #if DEVELOPMENT || DEBUG
502 unsigned long vm_clump_stats
[MAX_CLUMP_SIZE
+1];
503 unsigned long vm_clump_allocs
, vm_clump_inserts
, vm_clump_inrange
, vm_clump_promotes
;
505 static inline void vm_clump_update_stats(unsigned int c
) {
506 assert(c
<=vm_clump_size
);
507 if(c
>0 && c
<=vm_clump_size
) vm_clump_stats
[c
]+=c
;
510 #endif /* if DEVELOPMENT || DEBUG */
512 /* Called once to setup the VM clump knobs */
514 vm_page_setup_clump( void )
516 unsigned int override
, n
;
518 vm_clump_size
= DEFAULT_CLUMP_SIZE
;
519 if ( PE_parse_boot_argn("clump_size", &override
, sizeof (override
)) ) vm_clump_size
= override
;
521 if(vm_clump_size
> MAX_CLUMP_SIZE
) panic("vm_page_setup_clump:: clump_size is too large!");
522 if(vm_clump_size
< 1) panic("vm_page_setup_clump:: clump_size must be >= 1");
523 if((vm_clump_size
& (vm_clump_size
-1)) != 0) panic("vm_page_setup_clump:: clump_size must be a power of 2");
525 vm_clump_promote_threshold
= vm_clump_size
;
526 vm_clump_mask
= vm_clump_size
- 1;
527 for(vm_clump_shift
=0, n
=vm_clump_size
; n
>1; n
>>=1, vm_clump_shift
++);
529 #if DEVELOPMENT || DEBUG
530 bzero(vm_clump_stats
, sizeof(vm_clump_stats
));
531 vm_clump_allocs
= vm_clump_inserts
= vm_clump_inrange
= vm_clump_promotes
= 0;
532 #endif /* if DEVELOPMENT || DEBUG */
535 #endif /* #if defined (__x86_64__) */
537 #define COLOR_GROUPS_TO_STEAL 4
539 /* Called once during statup, once the cache geometry is known.
542 vm_page_set_colors( void )
544 unsigned int n
, override
;
546 #if defined (__x86_64__)
547 /* adjust #colors because we need to color outside the clump boundary */
548 vm_cache_geometry_colors
>>= vm_clump_shift
;
550 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
552 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
553 n
= vm_cache_geometry_colors
;
554 else n
= DEFAULT_COLORS
; /* use default if all else fails */
558 if ( n
> MAX_COLORS
)
561 /* the count must be a power of 2 */
562 if ( ( n
& (n
- 1)) != 0 )
563 n
= DEFAULT_COLORS
; /* use default if all else fails */
566 vm_color_mask
= n
- 1;
568 vm_free_magazine_refill_limit
= vm_colors
* COLOR_GROUPS_TO_STEAL
;
570 #if defined (__x86_64__)
571 /* adjust for reduction in colors due to clumping and multiple cores */
573 vm_free_magazine_refill_limit
*= (vm_clump_size
* real_ncpus
);
578 lck_grp_t vm_page_lck_grp_free
;
579 lck_grp_t vm_page_lck_grp_queue
;
580 lck_grp_t vm_page_lck_grp_local
;
581 lck_grp_t vm_page_lck_grp_purge
;
582 lck_grp_t vm_page_lck_grp_alloc
;
583 lck_grp_t vm_page_lck_grp_bucket
;
584 lck_grp_attr_t vm_page_lck_grp_attr
;
585 lck_attr_t vm_page_lck_attr
;
588 __private_extern__
void
589 vm_page_init_lck_grp(void)
592 * initialze the vm_page lock world
594 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
595 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
596 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
597 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
598 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
599 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
600 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
601 lck_attr_setdefault(&vm_page_lck_attr
);
602 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
604 vm_compressor_init_locks();
607 #define ROUNDUP_NEXTP2(X) (1U << (32 - __builtin_clz((X) - 1)))
610 vm_page_init_local_q()
612 unsigned int num_cpus
;
614 struct vplq
*t_local_q
;
616 num_cpus
= ml_get_max_cpus();
619 * no point in this for a uni-processor system
623 /* KASAN breaks the expectation of a size-aligned object by adding a
624 * redzone, so explicitly align. */
625 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
) + VM_PACKED_POINTER_ALIGNMENT
);
626 t_local_q
= (void *)(((uintptr_t)t_local_q
+ (VM_PACKED_POINTER_ALIGNMENT
-1)) & ~(VM_PACKED_POINTER_ALIGNMENT
-1));
628 /* round the size up to the nearest power of two */
629 t_local_q
= (struct vplq
*)kalloc(ROUNDUP_NEXTP2(num_cpus
* sizeof(struct vplq
)));
632 for (i
= 0; i
< num_cpus
; i
++) {
635 lq
= &t_local_q
[i
].vpl_un
.vpl
;
636 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
637 vm_page_queue_init(&lq
->vpl_queue
);
639 lq
->vpl_internal_count
= 0;
640 lq
->vpl_external_count
= 0;
642 vm_page_local_q_count
= num_cpus
;
644 vm_page_local_q
= (struct vplq
*)t_local_q
;
649 * vm_init_before_launchd
651 * This should be called right before launchd is loaded.
654 vm_init_before_launchd()
656 vm_page_wire_count_on_boot
= vm_page_wire_count
;
663 * Initializes the resident memory module.
665 * Allocates memory for the page cells, and
666 * for the object/offset-to-page hash table headers.
667 * Each page cell is initialized and placed on the free list.
668 * Returns the range of available kernel virtual memory.
683 * Initialize the vm_page template.
686 m
= &vm_page_template
;
687 bzero(m
, sizeof (*m
));
689 #if CONFIG_BACKGROUND_QUEUE
690 m
->vm_page_backgroundq
.next
= 0;
691 m
->vm_page_backgroundq
.prev
= 0;
692 m
->vm_page_in_background
= FALSE
;
693 m
->vm_page_on_backgroundq
= FALSE
;
696 VM_PAGE_ZERO_PAGEQ_ENTRY(m
);
701 m
->vm_page_object
= 0; /* reset later */
702 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
705 m
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
707 m
->reference
= FALSE
;
710 m
->__unused_pageq_bits
= 0;
712 #if !defined(__arm__) && !defined(__arm64__)
713 VM_PAGE_SET_PHYS_PAGE(m
, 0); /* reset later */
719 m
->fictitious
= FALSE
;
722 m
->free_when_done
= FALSE
;
728 m
->clustered
= FALSE
;
729 m
->overwriting
= FALSE
;
732 m
->cs_validated
= FALSE
;
733 m
->cs_tainted
= FALSE
;
739 m
->written_by_kernel
= FALSE
;
740 m
->__unused_object_bits
= 0;
743 * Initialize the page queues.
745 vm_page_init_lck_grp();
747 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
748 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
749 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
751 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
754 purgeable_queues
[i
].token_q_head
= 0;
755 purgeable_queues
[i
].token_q_tail
= 0;
756 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
757 queue_init(&purgeable_queues
[i
].objq
[group
]);
759 purgeable_queues
[i
].type
= i
;
760 purgeable_queues
[i
].new_pages
= 0;
762 purgeable_queues
[i
].debug_count_tokens
= 0;
763 purgeable_queues
[i
].debug_count_objects
= 0;
766 purgeable_nonvolatile_count
= 0;
767 queue_init(&purgeable_nonvolatile_queue
);
769 for (i
= 0; i
< MAX_COLORS
; i
++ )
770 vm_page_queue_init(&vm_page_queue_free
[i
].qhead
);
772 vm_page_queue_init(&vm_lopage_queue_free
);
773 vm_page_queue_init(&vm_page_queue_active
);
774 vm_page_queue_init(&vm_page_queue_inactive
);
775 #if CONFIG_SECLUDED_MEMORY
776 vm_page_queue_init(&vm_page_queue_secluded
);
777 #endif /* CONFIG_SECLUDED_MEMORY */
778 vm_page_queue_init(&vm_page_queue_cleaned
);
779 vm_page_queue_init(&vm_page_queue_throttled
);
780 vm_page_queue_init(&vm_page_queue_anonymous
);
781 queue_init(&vm_objects_wired
);
783 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
784 vm_page_queue_init(&vm_page_queue_speculative
[i
].age_q
);
786 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
787 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
789 #if CONFIG_BACKGROUND_QUEUE
790 vm_page_queue_init(&vm_page_queue_background
);
792 vm_page_background_count
= 0;
793 vm_page_background_internal_count
= 0;
794 vm_page_background_external_count
= 0;
795 vm_page_background_promoted_count
= 0;
797 vm_page_background_target
= (unsigned int)(atop_64(max_mem
) / 25);
799 if (vm_page_background_target
> VM_PAGE_BACKGROUND_TARGET_MAX
)
800 vm_page_background_target
= VM_PAGE_BACKGROUND_TARGET_MAX
;
802 vm_page_background_mode
= VM_PAGE_BG_LEVEL_1
;
803 vm_page_background_exclude_external
= 0;
805 PE_parse_boot_argn("vm_page_bg_mode", &vm_page_background_mode
, sizeof(vm_page_background_mode
));
806 PE_parse_boot_argn("vm_page_bg_exclude_external", &vm_page_background_exclude_external
, sizeof(vm_page_background_exclude_external
));
807 PE_parse_boot_argn("vm_page_bg_target", &vm_page_background_target
, sizeof(vm_page_background_target
));
809 if (vm_page_background_mode
> VM_PAGE_BG_LEVEL_1
)
810 vm_page_background_mode
= VM_PAGE_BG_LEVEL_1
;
812 vm_page_free_wanted
= 0;
813 vm_page_free_wanted_privileged
= 0;
814 #if CONFIG_SECLUDED_MEMORY
815 vm_page_free_wanted_secluded
= 0;
816 #endif /* CONFIG_SECLUDED_MEMORY */
818 #if defined (__x86_64__)
819 /* this must be called before vm_page_set_colors() */
820 vm_page_setup_clump();
823 vm_page_set_colors();
825 bzero(vm_page_inactive_states
, sizeof(vm_page_inactive_states
));
826 vm_page_inactive_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
827 vm_page_inactive_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
828 vm_page_inactive_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
830 bzero(vm_page_pageable_states
, sizeof(vm_page_pageable_states
));
831 vm_page_pageable_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
832 vm_page_pageable_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
833 vm_page_pageable_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
834 vm_page_pageable_states
[VM_PAGE_ON_ACTIVE_Q
] = 1;
835 vm_page_pageable_states
[VM_PAGE_ON_SPECULATIVE_Q
] = 1;
836 vm_page_pageable_states
[VM_PAGE_ON_THROTTLED_Q
] = 1;
837 #if CONFIG_SECLUDED_MEMORY
838 vm_page_pageable_states
[VM_PAGE_ON_SECLUDED_Q
] = 1;
839 #endif /* CONFIG_SECLUDED_MEMORY */
841 bzero(vm_page_non_speculative_pageable_states
, sizeof(vm_page_non_speculative_pageable_states
));
842 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
843 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
844 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
845 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_ACTIVE_Q
] = 1;
846 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_THROTTLED_Q
] = 1;
847 #if CONFIG_SECLUDED_MEMORY
848 vm_page_non_speculative_pageable_states
[VM_PAGE_ON_SECLUDED_Q
] = 1;
849 #endif /* CONFIG_SECLUDED_MEMORY */
851 bzero(vm_page_active_or_inactive_states
, sizeof(vm_page_active_or_inactive_states
));
852 vm_page_active_or_inactive_states
[VM_PAGE_ON_INACTIVE_INTERNAL_Q
] = 1;
853 vm_page_active_or_inactive_states
[VM_PAGE_ON_INACTIVE_EXTERNAL_Q
] = 1;
854 vm_page_active_or_inactive_states
[VM_PAGE_ON_INACTIVE_CLEANED_Q
] = 1;
855 vm_page_active_or_inactive_states
[VM_PAGE_ON_ACTIVE_Q
] = 1;
856 #if CONFIG_SECLUDED_MEMORY
857 vm_page_active_or_inactive_states
[VM_PAGE_ON_SECLUDED_Q
] = 1;
858 #endif /* CONFIG_SECLUDED_MEMORY */
860 for (i
= 0; i
< VM_KERN_MEMORY_FIRST_DYNAMIC
; i
++)
862 vm_allocation_sites_static
[i
].refcount
= 2;
863 vm_allocation_sites_static
[i
].tag
= i
;
864 vm_allocation_sites
[i
] = &vm_allocation_sites_static
[i
];
866 vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
].refcount
= 2;
867 vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
].tag
= VM_KERN_MEMORY_ANY
;
868 vm_allocation_sites
[VM_KERN_MEMORY_ANY
] = &vm_allocation_sites_static
[VM_KERN_MEMORY_FIRST_DYNAMIC
];
871 * Steal memory for the map and zone subsystems.
876 kernel_debug_string_early("vm_map_steal_memory");
877 vm_map_steal_memory();
880 * Allocate (and initialize) the virtual-to-physical
881 * table hash buckets.
883 * The number of buckets should be a power of two to
884 * get a good hash function. The following computation
885 * chooses the first power of two that is greater
886 * than the number of physical pages in the system.
889 if (vm_page_bucket_count
== 0) {
890 unsigned int npages
= pmap_free_pages();
892 vm_page_bucket_count
= 1;
893 while (vm_page_bucket_count
< npages
)
894 vm_page_bucket_count
<<= 1;
896 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
898 vm_page_hash_mask
= vm_page_bucket_count
- 1;
901 * Calculate object shift value for hashing algorithm:
902 * O = log2(sizeof(struct vm_object))
903 * B = log2(vm_page_bucket_count)
904 * hash shifts the object left by
907 size
= vm_page_bucket_count
;
908 for (log1
= 0; size
> 1; log1
++)
910 size
= sizeof(struct vm_object
);
911 for (log2
= 0; size
> 1; log2
++)
913 vm_page_hash_shift
= log1
/2 - log2
+ 1;
915 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
916 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
917 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
919 if (vm_page_hash_mask
& vm_page_bucket_count
)
920 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
922 #if VM_PAGE_BUCKETS_CHECK
923 #if VM_PAGE_FAKE_BUCKETS
925 * Allocate a decoy set of page buckets, to detect
926 * any stomping there.
928 vm_page_fake_buckets
= (vm_page_bucket_t
*)
929 pmap_steal_memory(vm_page_bucket_count
*
930 sizeof(vm_page_bucket_t
));
931 vm_page_fake_buckets_start
= (vm_map_offset_t
) vm_page_fake_buckets
;
932 vm_page_fake_buckets_end
=
933 vm_map_round_page((vm_page_fake_buckets_start
+
934 (vm_page_bucket_count
*
935 sizeof (vm_page_bucket_t
))),
938 for (cp
= (char *)vm_page_fake_buckets_start
;
939 cp
< (char *)vm_page_fake_buckets_end
;
943 #endif /* VM_PAGE_FAKE_BUCKETS */
944 #endif /* VM_PAGE_BUCKETS_CHECK */
946 kernel_debug_string_early("vm_page_buckets");
947 vm_page_buckets
= (vm_page_bucket_t
*)
948 pmap_steal_memory(vm_page_bucket_count
*
949 sizeof(vm_page_bucket_t
));
951 kernel_debug_string_early("vm_page_bucket_locks");
952 vm_page_bucket_locks
= (lck_spin_t
*)
953 pmap_steal_memory(vm_page_bucket_lock_count
*
956 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
957 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
959 bucket
->page_list
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
960 #if MACH_PAGE_HASH_STATS
961 bucket
->cur_count
= 0;
962 bucket
->hi_count
= 0;
963 #endif /* MACH_PAGE_HASH_STATS */
966 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
967 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
969 lck_spin_init(&vm_objects_wired_lock
, &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
970 lck_spin_init(&vm_allocation_sites_lock
, &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
973 #if VM_PAGE_BUCKETS_CHECK
974 vm_page_buckets_check_ready
= TRUE
;
975 #endif /* VM_PAGE_BUCKETS_CHECK */
978 * Machine-dependent code allocates the resident page table.
979 * It uses vm_page_init to initialize the page frames.
980 * The code also returns to us the virtual space available
981 * to the kernel. We don't trust the pmap module
982 * to get the alignment right.
985 kernel_debug_string_early("pmap_startup");
986 pmap_startup(&virtual_space_start
, &virtual_space_end
);
987 virtual_space_start
= round_page(virtual_space_start
);
988 virtual_space_end
= trunc_page(virtual_space_end
);
990 *startp
= virtual_space_start
;
991 *endp
= virtual_space_end
;
994 * Compute the initial "wire" count.
995 * Up until now, the pages which have been set aside are not under
996 * the VM system's control, so although they aren't explicitly
997 * wired, they nonetheless can't be moved. At this moment,
998 * all VM managed pages are "free", courtesy of pmap_startup.
1000 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
1001 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
1002 #if CONFIG_SECLUDED_MEMORY
1003 vm_page_wire_count
-= vm_page_secluded_count
;
1005 vm_page_wire_count_initial
= vm_page_wire_count
;
1006 vm_page_pages_initial
= vm_page_pages
;
1008 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
1009 vm_page_free_count
, vm_page_wire_count
);
1011 kernel_debug_string_early("vm_page_bootstrap complete");
1012 simple_lock_init(&vm_paging_lock
, 0);
1015 #ifndef MACHINE_PAGES
1017 * We implement pmap_steal_memory and pmap_startup with the help
1018 * of two simpler functions, pmap_virtual_space and pmap_next_page.
1026 vm_offset_t addr
, vaddr
;
1030 * We round the size to a round multiple.
1033 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
1036 * If this is the first call to pmap_steal_memory,
1037 * we have to initialize ourself.
1040 if (virtual_space_start
== virtual_space_end
) {
1041 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
1044 * The initial values must be aligned properly, and
1045 * we don't trust the pmap module to do it right.
1048 virtual_space_start
= round_page(virtual_space_start
);
1049 virtual_space_end
= trunc_page(virtual_space_end
);
1053 * Allocate virtual memory for this request.
1056 addr
= virtual_space_start
;
1057 virtual_space_start
+= size
;
1059 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1062 * Allocate and map physical pages to back new virtual pages.
1065 for (vaddr
= round_page(addr
);
1066 vaddr
< addr
+ size
;
1067 vaddr
+= PAGE_SIZE
) {
1069 if (!pmap_next_page_hi(&phys_page
))
1070 panic("pmap_steal_memory() size: 0x%llx\n", (uint64_t)size
);
1073 * XXX Logically, these mappings should be wired,
1074 * but some pmap modules barf if they are.
1076 #if defined(__LP64__)
1078 /* ARM64_TODO: verify that we really don't need this */
1080 pmap_pre_expand(kernel_pmap
, vaddr
);
1084 kr
= pmap_enter(kernel_pmap
, vaddr
, phys_page
,
1085 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
1086 VM_WIMG_USE_DEFAULT
, FALSE
);
1088 if (kr
!= KERN_SUCCESS
) {
1089 panic("pmap_steal_memory() pmap_enter failed, vaddr=%#lx, phys_page=%u",
1090 (unsigned long)vaddr
, phys_page
);
1094 * Account for newly stolen memory
1096 vm_page_wire_count
++;
1097 vm_page_stolen_count
++;
1101 kasan_notify_address(round_page(addr
), size
);
1103 return (void *) addr
;
1106 #if CONFIG_SECLUDED_MEMORY
1107 /* boot-args to control secluded memory */
1108 unsigned int secluded_mem_mb
= 0; /* # of MBs of RAM to seclude */
1109 int secluded_for_iokit
= 1; /* IOKit can use secluded memory */
1110 int secluded_for_apps
= 1; /* apps can use secluded memory */
1111 int secluded_for_filecache
= 2; /* filecache can use seclude memory */
1113 int secluded_for_fbdp
= 0;
1115 #endif /* CONFIG_SECLUDED_MEMORY */
1118 #if defined(__arm__) || defined(__arm64__)
1119 extern void patch_low_glo_vm_page_info(void *, void *, uint32_t);
1120 unsigned int vm_first_phys_ppnum
= 0;
1124 void vm_page_release_startup(vm_page_t mem
);
1127 vm_offset_t
*startp
,
1130 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
1134 #if defined(__LP64__)
1136 * make sure we are aligned on a 64 byte boundary
1137 * for VM_PAGE_PACK_PTR (it clips off the low-order
1138 * 6 bits of the pointer)
1140 if (virtual_space_start
!= virtual_space_end
)
1141 virtual_space_start
= round_page(virtual_space_start
);
1145 * We calculate how many page frames we will have
1146 * and then allocate the page structures in one chunk.
1149 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
1150 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
1151 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1153 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
1156 * Initialize the page frames.
1158 kernel_debug_string_early("Initialize the page frames");
1160 vm_page_array_beginning_addr
= &vm_pages
[0];
1161 vm_page_array_ending_addr
= &vm_pages
[npages
];
1163 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
1164 if (!pmap_next_page(&phys_page
))
1166 #if defined(__arm__) || defined(__arm64__)
1167 if (pages_initialized
== 0) {
1168 vm_first_phys_ppnum
= phys_page
;
1169 patch_low_glo_vm_page_info((void *)vm_page_array_beginning_addr
, (void *)vm_page_array_ending_addr
, vm_first_phys_ppnum
);
1171 assert((i
+ vm_first_phys_ppnum
) == phys_page
);
1173 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
1174 vm_page_lowest
= phys_page
;
1176 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
1178 pages_initialized
++;
1180 vm_pages_count
= pages_initialized
;
1181 vm_page_array_boundary
= &vm_pages
[pages_initialized
];
1183 #if defined(__LP64__)
1185 if ((vm_page_t
)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[0]))) != &vm_pages
[0])
1186 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages
[0]);
1188 if ((vm_page_t
)(VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[vm_pages_count
-1]))) != &vm_pages
[vm_pages_count
-1])
1189 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages
[vm_pages_count
-1]);
1191 kernel_debug_string_early("page fill/release");
1193 * Check if we want to initialize pages to a known value
1195 fill
= 0; /* Assume no fill */
1196 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
1198 /* This slows down booting the DEBUG kernel, particularly on
1199 * large memory systems, but is worthwhile in deterministically
1200 * trapping uninitialized memory usage.
1204 fillval
= 0xDEB8F177;
1208 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
1210 #if CONFIG_SECLUDED_MEMORY
1211 /* default: no secluded mem */
1212 secluded_mem_mb
= 0;
1213 if (max_mem
> 1*1024*1024*1024) {
1214 /* default to 90MB for devices with > 1GB of RAM */
1215 secluded_mem_mb
= 90;
1217 /* override with value from device tree, if provided */
1218 PE_get_default("kern.secluded_mem_mb",
1219 &secluded_mem_mb
, sizeof(secluded_mem_mb
));
1220 /* override with value from boot-args, if provided */
1221 PE_parse_boot_argn("secluded_mem_mb",
1223 sizeof (secluded_mem_mb
));
1225 vm_page_secluded_target
= (unsigned int)
1226 ((secluded_mem_mb
* 1024ULL * 1024ULL) / PAGE_SIZE
);
1227 PE_parse_boot_argn("secluded_for_iokit",
1228 &secluded_for_iokit
,
1229 sizeof (secluded_for_iokit
));
1230 PE_parse_boot_argn("secluded_for_apps",
1232 sizeof (secluded_for_apps
));
1233 PE_parse_boot_argn("secluded_for_filecache",
1234 &secluded_for_filecache
,
1235 sizeof (secluded_for_filecache
));
1237 PE_parse_boot_argn("secluded_for_fbdp",
1239 sizeof (secluded_for_fbdp
));
1241 #endif /* CONFIG_SECLUDED_MEMORY */
1243 // -debug code remove
1244 if (2 == vm_himemory_mode
) {
1245 // free low -> high so high is preferred
1246 for (i
= 1; i
<= pages_initialized
; i
++) {
1247 if(fill
) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
- 1]), fillval
); /* Fill the page with a know value if requested at boot */
1248 vm_page_release_startup(&vm_pages
[i
- 1]);
1252 // debug code remove-
1255 * Release pages in reverse order so that physical pages
1256 * initially get allocated in ascending addresses. This keeps
1257 * the devices (which must address physical memory) happy if
1258 * they require several consecutive pages.
1260 for (i
= pages_initialized
; i
> 0; i
--) {
1261 if(fill
) fillPage(VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
- 1]), fillval
); /* Fill the page with a know value if requested at boot */
1262 vm_page_release_startup(&vm_pages
[i
- 1]);
1265 VM_CHECK_MEMORYSTATUS
;
1269 vm_page_t xx
, xxo
, xxl
;
1272 j
= 0; /* (BRINGUP) */
1275 for( i
= 0; i
< vm_colors
; i
++ ) {
1276 queue_iterate(&vm_page_queue_free
[i
].qhead
,
1279 pageq
) { /* BRINGUP */
1280 j
++; /* (BRINGUP) */
1281 if(j
> vm_page_free_count
) { /* (BRINGUP) */
1282 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
1285 l
= vm_page_free_count
- j
; /* (BRINGUP) */
1286 k
= 0; /* (BRINGUP) */
1288 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
1290 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
].qhead
; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
1292 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
1293 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
1294 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
1302 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
1303 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
1310 * We have to re-align virtual_space_start,
1311 * because pmap_steal_memory has been using it.
1314 virtual_space_start
= round_page(virtual_space_start
);
1316 *startp
= virtual_space_start
;
1317 *endp
= virtual_space_end
;
1319 #endif /* MACHINE_PAGES */
1322 * Routine: vm_page_module_init
1324 * Second initialization pass, to be done after
1325 * the basic VM system is ready.
1328 vm_page_module_init(void)
1330 uint64_t vm_page_zone_pages
, vm_page_array_zone_data_size
;
1331 vm_size_t vm_page_with_ppnum_size
;
1333 vm_page_array_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
1334 0, PAGE_SIZE
, "vm pages array");
1336 zone_change(vm_page_array_zone
, Z_CALLERACCT
, FALSE
);
1337 zone_change(vm_page_array_zone
, Z_EXPAND
, FALSE
);
1338 zone_change(vm_page_array_zone
, Z_EXHAUST
, TRUE
);
1339 zone_change(vm_page_array_zone
, Z_FOREIGN
, TRUE
);
1340 zone_change(vm_page_array_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
1342 * Adjust zone statistics to account for the real pages allocated
1343 * in vm_page_create(). [Q: is this really what we want?]
1345 vm_page_array_zone
->count
+= vm_page_pages
;
1346 vm_page_array_zone
->sum_count
+= vm_page_pages
;
1347 vm_page_array_zone_data_size
= vm_page_pages
* vm_page_array_zone
->elem_size
;
1348 vm_page_array_zone
->cur_size
+= vm_page_array_zone_data_size
;
1349 vm_page_zone_pages
= ((round_page(vm_page_array_zone_data_size
)) / PAGE_SIZE
);
1350 OSAddAtomic64(vm_page_zone_pages
, &(vm_page_array_zone
->page_count
));
1351 /* since zone accounts for these, take them out of stolen */
1352 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages
);
1354 vm_page_with_ppnum_size
= (sizeof(struct vm_page_with_ppnum
) + (VM_PACKED_POINTER_ALIGNMENT
-1)) & ~(VM_PACKED_POINTER_ALIGNMENT
- 1);
1356 vm_page_zone
= zinit(vm_page_with_ppnum_size
,
1357 0, PAGE_SIZE
, "vm pages");
1359 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
1360 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
1361 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
1362 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
1363 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
1364 zone_change(vm_page_zone
, Z_ALIGNMENT_REQUIRED
, TRUE
);
1368 * Routine: vm_page_create
1370 * After the VM system is up, machine-dependent code
1371 * may stumble across more physical memory. For example,
1372 * memory that it was reserving for a frame buffer.
1373 * vm_page_create turns this memory into available pages.
1384 for (phys_page
= start
;
1387 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
1389 vm_page_more_fictitious();
1391 m
->fictitious
= FALSE
;
1392 pmap_clear_noencrypt(phys_page
);
1395 vm_page_release(m
, FALSE
);
1402 * Distributes the object/offset key pair among hash buckets.
1404 * NOTE: The bucket count must be a power of 2
1406 #define vm_page_hash(object, offset) (\
1407 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1408 & vm_page_hash_mask)
1412 * vm_page_insert: [ internal use only ]
1414 * Inserts the given mem entry into the object/object-page
1415 * table and object list.
1417 * The object must be locked.
1423 vm_object_offset_t offset
)
1425 vm_page_insert_internal(mem
, object
, offset
, VM_KERN_MEMORY_NONE
, FALSE
, TRUE
, FALSE
, FALSE
, NULL
);
1429 vm_page_insert_wired(
1432 vm_object_offset_t offset
,
1435 vm_page_insert_internal(mem
, object
, offset
, tag
, FALSE
, TRUE
, FALSE
, FALSE
, NULL
);
1439 vm_page_insert_internal(
1442 vm_object_offset_t offset
,
1444 boolean_t queues_lock_held
,
1445 boolean_t insert_in_hash
,
1446 boolean_t batch_pmap_op
,
1447 boolean_t batch_accounting
,
1448 uint64_t *delayed_ledger_update
)
1450 vm_page_bucket_t
*bucket
;
1451 lck_spin_t
*bucket_lock
;
1456 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1457 object
, offset
, mem
, 0,0);
1460 * we may not hold the page queue lock
1461 * so this check isn't safe to make
1466 assert(page_aligned(offset
));
1468 assert(!VM_PAGE_WIRED(mem
) || mem
->private || mem
->fictitious
|| (tag
!= VM_KERN_MEMORY_NONE
));
1470 /* the vm_submap_object is only a placeholder for submaps */
1471 assert(object
!= vm_submap_object
);
1473 vm_object_lock_assert_exclusive(object
);
1474 LCK_MTX_ASSERT(&vm_page_queue_lock
,
1475 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1476 : LCK_MTX_ASSERT_NOTOWNED
);
1478 if (queues_lock_held
== FALSE
)
1479 assert(!VM_PAGE_PAGEABLE(mem
));
1481 if (insert_in_hash
== TRUE
) {
1482 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1483 if (mem
->tabled
|| mem
->vm_page_object
)
1484 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1485 "already in (obj=%p,off=0x%llx)",
1486 mem
, object
, offset
, VM_PAGE_OBJECT(mem
), mem
->offset
);
1488 if (object
->internal
&& (offset
>= object
->vo_size
)) {
1489 panic("vm_page_insert_internal: (page=%p,obj=%p,off=0x%llx,size=0x%llx) inserted at offset past object bounds",
1490 mem
, object
, offset
, object
->vo_size
);
1493 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1496 * Record the object/offset pair in this page
1499 mem
->vm_page_object
= VM_PAGE_PACK_OBJECT(object
);
1500 mem
->offset
= offset
;
1502 #if CONFIG_SECLUDED_MEMORY
1503 if (object
->eligible_for_secluded
) {
1504 vm_page_secluded
.eligible_for_secluded
++;
1506 #endif /* CONFIG_SECLUDED_MEMORY */
1509 * Insert it into the object_object/offset hash table
1511 hash_id
= vm_page_hash(object
, offset
);
1512 bucket
= &vm_page_buckets
[hash_id
];
1513 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1515 lck_spin_lock(bucket_lock
);
1517 mem
->next_m
= bucket
->page_list
;
1518 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1519 assert(mem
== (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
)));
1521 #if MACH_PAGE_HASH_STATS
1522 if (++bucket
->cur_count
> bucket
->hi_count
)
1523 bucket
->hi_count
= bucket
->cur_count
;
1524 #endif /* MACH_PAGE_HASH_STATS */
1526 lck_spin_unlock(bucket_lock
);
1530 unsigned int cache_attr
;
1532 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1534 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1535 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1539 * Now link into the object's list of backed pages.
1541 vm_page_queue_enter(&object
->memq
, mem
, vm_page_t
, listq
);
1542 object
->memq_hint
= mem
;
1546 * Show that the object has one more resident page.
1549 object
->resident_page_count
++;
1550 if (VM_PAGE_WIRED(mem
)) {
1551 assert(mem
->wire_count
> 0);
1552 VM_OBJECT_WIRED_PAGE_UPDATE_START(object
);
1553 VM_OBJECT_WIRED_PAGE_ADD(object
, mem
);
1554 VM_OBJECT_WIRED_PAGE_UPDATE_END(object
, tag
);
1556 assert(object
->resident_page_count
>= object
->wired_page_count
);
1558 if (batch_accounting
== FALSE
) {
1559 if (object
->internal
) {
1560 OSAddAtomic(1, &vm_page_internal_count
);
1562 OSAddAtomic(1, &vm_page_external_count
);
1567 * It wouldn't make sense to insert a "reusable" page in
1568 * an object (the page would have been marked "reusable" only
1569 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1570 * in the object at that time).
1571 * But a page could be inserted in a "all_reusable" object, if
1572 * something faults it in (a vm_read() from another task or a
1573 * "use-after-free" issue in user space, for example). It can
1574 * also happen if we're relocating a page from that object to
1575 * a different physical page during a physically-contiguous
1578 assert(!mem
->reusable
);
1579 if (object
->all_reusable
) {
1580 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1583 if (object
->purgable
== VM_PURGABLE_DENY
) {
1586 owner
= object
->vo_purgeable_owner
;
1589 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1590 VM_PAGE_WIRED(mem
))) {
1592 if (delayed_ledger_update
)
1593 *delayed_ledger_update
+= PAGE_SIZE
;
1595 /* more non-volatile bytes */
1596 ledger_credit(owner
->ledger
,
1597 task_ledgers
.purgeable_nonvolatile
,
1599 /* more footprint */
1600 ledger_credit(owner
->ledger
,
1601 task_ledgers
.phys_footprint
,
1606 (object
->purgable
== VM_PURGABLE_VOLATILE
||
1607 object
->purgable
== VM_PURGABLE_EMPTY
)) {
1608 assert(! VM_PAGE_WIRED(mem
));
1609 /* more volatile bytes */
1610 ledger_credit(owner
->ledger
,
1611 task_ledgers
.purgeable_volatile
,
1615 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1616 if (VM_PAGE_WIRED(mem
)) {
1617 OSAddAtomic(+1, &vm_page_purgeable_wired_count
);
1619 OSAddAtomic(+1, &vm_page_purgeable_count
);
1621 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1622 mem
->vm_page_q_state
== VM_PAGE_ON_THROTTLED_Q
) {
1624 * This page belongs to a purged VM object but hasn't
1625 * been purged (because it was "busy").
1626 * It's in the "throttled" queue and hence not
1627 * visible to vm_pageout_scan(). Move it to a pageable
1628 * queue, so that it can eventually be reclaimed, instead
1629 * of lingering in the "empty" object.
1631 if (queues_lock_held
== FALSE
)
1632 vm_page_lockspin_queues();
1633 vm_page_deactivate(mem
);
1634 if (queues_lock_held
== FALSE
)
1635 vm_page_unlock_queues();
1638 #if VM_OBJECT_TRACKING_OP_MODIFIED
1639 if (vm_object_tracking_inited
&&
1641 object
->resident_page_count
== 0 &&
1642 object
->pager
== NULL
&&
1643 object
->shadow
!= NULL
&&
1644 object
->shadow
->copy
== object
) {
1645 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
1648 numsaved
=OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
1649 btlog_add_entry(vm_object_tracking_btlog
,
1651 VM_OBJECT_TRACKING_OP_MODIFIED
,
1655 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1661 * Exactly like vm_page_insert, except that we first
1662 * remove any existing page at the given offset in object.
1664 * The object must be locked.
1670 vm_object_offset_t offset
)
1672 vm_page_bucket_t
*bucket
;
1673 vm_page_t found_m
= VM_PAGE_NULL
;
1674 lck_spin_t
*bucket_lock
;
1679 * we don't hold the page queue lock
1680 * so this check isn't safe to make
1684 vm_object_lock_assert_exclusive(object
);
1685 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1686 if (mem
->tabled
|| mem
->vm_page_object
)
1687 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1688 "already in (obj=%p,off=0x%llx)",
1689 mem
, object
, offset
, VM_PAGE_OBJECT(mem
), mem
->offset
);
1691 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1693 assert(!VM_PAGE_PAGEABLE(mem
));
1696 * Record the object/offset pair in this page
1698 mem
->vm_page_object
= VM_PAGE_PACK_OBJECT(object
);
1699 mem
->offset
= offset
;
1702 * Insert it into the object_object/offset hash table,
1703 * replacing any page that might have been there.
1706 hash_id
= vm_page_hash(object
, offset
);
1707 bucket
= &vm_page_buckets
[hash_id
];
1708 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1710 lck_spin_lock(bucket_lock
);
1712 if (bucket
->page_list
) {
1713 vm_page_packed_t
*mp
= &bucket
->page_list
;
1714 vm_page_t m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(*mp
));
1718 * compare packed object pointers
1720 if (m
->vm_page_object
== mem
->vm_page_object
&& m
->offset
== offset
) {
1722 * Remove old page from hash list
1726 m
->next_m
= VM_PAGE_PACK_PTR(NULL
);
1732 } while ((m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(*mp
))));
1734 mem
->next_m
= bucket
->page_list
;
1736 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
1739 * insert new page at head of hash list
1741 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1744 lck_spin_unlock(bucket_lock
);
1748 * there was already a page at the specified
1749 * offset for this object... remove it from
1750 * the object and free it back to the free list
1752 vm_page_free_unlocked(found_m
, FALSE
);
1754 vm_page_insert_internal(mem
, object
, offset
, VM_KERN_MEMORY_NONE
, FALSE
, FALSE
, FALSE
, FALSE
, NULL
);
1758 * vm_page_remove: [ internal use only ]
1760 * Removes the given mem entry from the object/offset-page
1761 * table and the object page list.
1763 * The object must be locked.
1769 boolean_t remove_from_hash
)
1771 vm_page_bucket_t
*bucket
;
1773 lck_spin_t
*bucket_lock
;
1776 vm_object_t m_object
;
1778 m_object
= VM_PAGE_OBJECT(mem
);
1781 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1782 m_object
, mem
->offset
,
1785 vm_object_lock_assert_exclusive(m_object
);
1786 assert(mem
->tabled
);
1787 assert(!mem
->cleaning
);
1788 assert(!mem
->laundry
);
1790 if (VM_PAGE_PAGEABLE(mem
)) {
1791 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
1795 * we don't hold the page queue lock
1796 * so this check isn't safe to make
1800 if (remove_from_hash
== TRUE
) {
1802 * Remove from the object_object/offset hash table
1804 hash_id
= vm_page_hash(m_object
, mem
->offset
);
1805 bucket
= &vm_page_buckets
[hash_id
];
1806 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1808 lck_spin_lock(bucket_lock
);
1810 if ((this = (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
))) == mem
) {
1811 /* optimize for common case */
1813 bucket
->page_list
= mem
->next_m
;
1815 vm_page_packed_t
*prev
;
1817 for (prev
= &this->next_m
;
1818 (this = (vm_page_t
)(VM_PAGE_UNPACK_PTR(*prev
))) != mem
;
1819 prev
= &this->next_m
)
1821 *prev
= this->next_m
;
1823 #if MACH_PAGE_HASH_STATS
1824 bucket
->cur_count
--;
1825 #endif /* MACH_PAGE_HASH_STATS */
1826 mem
->hashed
= FALSE
;
1827 this->next_m
= VM_PAGE_PACK_PTR(NULL
);
1828 lck_spin_unlock(bucket_lock
);
1831 * Now remove from the object's list of backed pages.
1834 vm_page_remove_internal(mem
);
1837 * And show that the object has one fewer resident
1841 assert(m_object
->resident_page_count
> 0);
1842 m_object
->resident_page_count
--;
1844 if (m_object
->internal
) {
1846 assert(vm_page_internal_count
);
1849 OSAddAtomic(-1, &vm_page_internal_count
);
1851 assert(vm_page_external_count
);
1852 OSAddAtomic(-1, &vm_page_external_count
);
1854 if (mem
->xpmapped
) {
1855 assert(vm_page_xpmapped_external_count
);
1856 OSAddAtomic(-1, &vm_page_xpmapped_external_count
);
1859 if (!m_object
->internal
&& (m_object
->objq
.next
|| m_object
->objq
.prev
)) {
1860 if (m_object
->resident_page_count
== 0)
1861 vm_object_cache_remove(m_object
);
1864 if (VM_PAGE_WIRED(mem
)) {
1865 assert(mem
->wire_count
> 0);
1866 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
1867 VM_OBJECT_WIRED_PAGE_REMOVE(m_object
, mem
);
1868 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, m_object
->wire_tag
);
1870 assert(m_object
->resident_page_count
>=
1871 m_object
->wired_page_count
);
1872 if (mem
->reusable
) {
1873 assert(m_object
->reusable_page_count
> 0);
1874 m_object
->reusable_page_count
--;
1875 assert(m_object
->reusable_page_count
<=
1876 m_object
->resident_page_count
);
1877 mem
->reusable
= FALSE
;
1878 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1879 vm_page_stats_reusable
.reused_remove
++;
1880 } else if (m_object
->all_reusable
) {
1881 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1882 vm_page_stats_reusable
.reused_remove
++;
1885 if (m_object
->purgable
== VM_PURGABLE_DENY
) {
1888 owner
= m_object
->vo_purgeable_owner
;
1891 (m_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1892 VM_PAGE_WIRED(mem
))) {
1893 /* less non-volatile bytes */
1894 ledger_debit(owner
->ledger
,
1895 task_ledgers
.purgeable_nonvolatile
,
1897 /* less footprint */
1898 ledger_debit(owner
->ledger
,
1899 task_ledgers
.phys_footprint
,
1902 (m_object
->purgable
== VM_PURGABLE_VOLATILE
||
1903 m_object
->purgable
== VM_PURGABLE_EMPTY
)) {
1904 assert(! VM_PAGE_WIRED(mem
));
1905 /* less volatile bytes */
1906 ledger_debit(owner
->ledger
,
1907 task_ledgers
.purgeable_volatile
,
1910 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
1911 if (VM_PAGE_WIRED(mem
)) {
1912 assert(vm_page_purgeable_wired_count
> 0);
1913 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1915 assert(vm_page_purgeable_count
> 0);
1916 OSAddAtomic(-1, &vm_page_purgeable_count
);
1920 if (m_object
->set_cache_attr
== TRUE
)
1921 pmap_set_cache_attributes(VM_PAGE_GET_PHYS_PAGE(mem
), 0);
1923 mem
->tabled
= FALSE
;
1924 mem
->vm_page_object
= 0;
1925 mem
->offset
= (vm_object_offset_t
) -1;
1932 * Returns the page associated with the object/offset
1933 * pair specified; if none is found, VM_PAGE_NULL is returned.
1935 * The object must be locked. No side effects.
1938 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1940 #if DEBUG_VM_PAGE_LOOKUP
1944 uint64_t vpl_empty_obj
;
1945 uint64_t vpl_bucket_NULL
;
1946 uint64_t vpl_hit_hint
;
1947 uint64_t vpl_hit_hint_next
;
1948 uint64_t vpl_hit_hint_prev
;
1954 uint64_t vpl_fast_elapsed
;
1955 uint64_t vpl_slow_elapsed
;
1956 } vm_page_lookup_stats
__attribute__((aligned(8)));
1960 #define KDP_VM_PAGE_WALK_MAX 1000
1965 vm_object_offset_t offset
)
1968 int num_traversed
= 0;
1971 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1974 vm_page_queue_iterate(&object
->memq
, cur_page
, vm_page_t
, listq
) {
1975 if (cur_page
->offset
== offset
) {
1980 if (num_traversed
>= KDP_VM_PAGE_WALK_MAX
) {
1981 return VM_PAGE_NULL
;
1985 return VM_PAGE_NULL
;
1991 vm_object_offset_t offset
)
1994 vm_page_bucket_t
*bucket
;
1995 vm_page_queue_entry_t qe
;
1996 lck_spin_t
*bucket_lock
= NULL
;
1998 #if DEBUG_VM_PAGE_LOOKUP
1999 uint64_t start
, elapsed
;
2001 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_total
);
2003 vm_object_lock_assert_held(object
);
2005 if (object
->resident_page_count
== 0) {
2006 #if DEBUG_VM_PAGE_LOOKUP
2007 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_empty_obj
);
2009 return (VM_PAGE_NULL
);
2012 mem
= object
->memq_hint
;
2014 if (mem
!= VM_PAGE_NULL
) {
2015 assert(VM_PAGE_OBJECT(mem
) == object
);
2017 if (mem
->offset
== offset
) {
2018 #if DEBUG_VM_PAGE_LOOKUP
2019 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint
);
2023 qe
= (vm_page_queue_entry_t
)vm_page_queue_next(&mem
->listq
);
2025 if (! vm_page_queue_end(&object
->memq
, qe
)) {
2026 vm_page_t next_page
;
2028 next_page
= (vm_page_t
)((uintptr_t)qe
);
2029 assert(VM_PAGE_OBJECT(next_page
) == object
);
2031 if (next_page
->offset
== offset
) {
2032 object
->memq_hint
= next_page
; /* new hint */
2033 #if DEBUG_VM_PAGE_LOOKUP
2034 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint_next
);
2039 qe
= (vm_page_queue_entry_t
)vm_page_queue_prev(&mem
->listq
);
2041 if (! vm_page_queue_end(&object
->memq
, qe
)) {
2042 vm_page_t prev_page
;
2044 prev_page
= (vm_page_t
)((uintptr_t)qe
);
2045 assert(VM_PAGE_OBJECT(prev_page
) == object
);
2047 if (prev_page
->offset
== offset
) {
2048 object
->memq_hint
= prev_page
; /* new hint */
2049 #if DEBUG_VM_PAGE_LOOKUP
2050 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint_prev
);
2057 * Search the hash table for this object/offset pair
2059 hash_id
= vm_page_hash(object
, offset
);
2060 bucket
= &vm_page_buckets
[hash_id
];
2063 * since we hold the object lock, we are guaranteed that no
2064 * new pages can be inserted into this object... this in turn
2065 * guarantess that the page we're looking for can't exist
2066 * if the bucket it hashes to is currently NULL even when looked
2067 * at outside the scope of the hash bucket lock... this is a
2068 * really cheap optimiztion to avoid taking the lock
2070 if (!bucket
->page_list
) {
2071 #if DEBUG_VM_PAGE_LOOKUP
2072 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_bucket_NULL
);
2074 return (VM_PAGE_NULL
);
2077 #if DEBUG_VM_PAGE_LOOKUP
2078 start
= mach_absolute_time();
2080 if (object
->resident_page_count
<= VM_PAGE_HASH_LOOKUP_THRESHOLD
) {
2082 * on average, it's roughly 3 times faster to run a short memq list
2083 * than to take the spin lock and go through the hash list
2085 mem
= (vm_page_t
)vm_page_queue_first(&object
->memq
);
2087 while (!vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)mem
)) {
2089 if (mem
->offset
== offset
)
2092 mem
= (vm_page_t
)vm_page_queue_next(&mem
->listq
);
2094 if (vm_page_queue_end(&object
->memq
, (vm_page_queue_entry_t
)mem
))
2097 vm_page_object_t packed_object
;
2099 packed_object
= VM_PAGE_PACK_OBJECT(object
);
2101 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
2103 lck_spin_lock(bucket_lock
);
2105 for (mem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
));
2106 mem
!= VM_PAGE_NULL
;
2107 mem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->next_m
))) {
2110 * we don't hold the page queue lock
2111 * so this check isn't safe to make
2115 if ((mem
->vm_page_object
== packed_object
) && (mem
->offset
== offset
))
2118 lck_spin_unlock(bucket_lock
);
2121 #if DEBUG_VM_PAGE_LOOKUP
2122 elapsed
= mach_absolute_time() - start
;
2125 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_slow
);
2126 OSAddAtomic64(elapsed
, &vm_page_lookup_stats
.vpl_slow_elapsed
);
2128 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_fast
);
2129 OSAddAtomic64(elapsed
, &vm_page_lookup_stats
.vpl_fast_elapsed
);
2131 if (mem
!= VM_PAGE_NULL
)
2132 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit
);
2134 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_miss
);
2136 if (mem
!= VM_PAGE_NULL
) {
2137 assert(VM_PAGE_OBJECT(mem
) == object
);
2139 object
->memq_hint
= mem
;
2148 * Move the given memory entry from its
2149 * current object to the specified target object/offset.
2151 * The object must be locked.
2156 vm_object_t new_object
,
2157 vm_object_offset_t new_offset
)
2159 boolean_t internal_to_external
, external_to_internal
;
2161 vm_object_t m_object
;
2163 m_object
= VM_PAGE_OBJECT(mem
);
2165 assert(m_object
!= new_object
);
2169 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
2170 new_object
, new_offset
,
2174 * Changes to mem->object require the page lock because
2175 * the pageout daemon uses that lock to get the object.
2177 vm_page_lockspin_queues();
2179 internal_to_external
= FALSE
;
2180 external_to_internal
= FALSE
;
2182 if (mem
->vm_page_q_state
== VM_PAGE_ON_ACTIVE_LOCAL_Q
) {
2184 * it's much easier to get the vm_page_pageable_xxx accounting correct
2185 * if we first move the page to the active queue... it's going to end
2186 * up there anyway, and we don't do vm_page_rename's frequently enough
2187 * for this to matter.
2189 vm_page_queues_remove(mem
, FALSE
);
2190 vm_page_activate(mem
);
2192 if (VM_PAGE_PAGEABLE(mem
)) {
2193 if (m_object
->internal
&& !new_object
->internal
) {
2194 internal_to_external
= TRUE
;
2196 if (!m_object
->internal
&& new_object
->internal
) {
2197 external_to_internal
= TRUE
;
2201 tag
= m_object
->wire_tag
;
2202 vm_page_remove(mem
, TRUE
);
2203 vm_page_insert_internal(mem
, new_object
, new_offset
, tag
, TRUE
, TRUE
, FALSE
, FALSE
, NULL
);
2205 if (internal_to_external
) {
2206 vm_page_pageable_internal_count
--;
2207 vm_page_pageable_external_count
++;
2208 } else if (external_to_internal
) {
2209 vm_page_pageable_external_count
--;
2210 vm_page_pageable_internal_count
++;
2213 vm_page_unlock_queues();
2219 * Initialize the fields in a new page.
2220 * This takes a structure with random values and initializes it
2221 * so that it can be given to vm_page_release or vm_page_insert.
2232 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
2233 if (!(pmap_valid_page(phys_page
))) {
2234 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
2238 *mem
= vm_page_template
;
2240 VM_PAGE_SET_PHYS_PAGE(mem
, phys_page
);
2243 * we're leaving this turned off for now... currently pages
2244 * come off the free list and are either immediately dirtied/referenced
2245 * due to zero-fill or COW faults, or are used to read or write files...
2246 * in the file I/O case, the UPL mechanism takes care of clearing
2247 * the state of the HW ref/mod bits in a somewhat fragile way.
2248 * Since we may change the way this works in the future (to toughen it up),
2249 * I'm leaving this as a reminder of where these bits could get cleared
2253 * make sure both the h/w referenced and modified bits are
2254 * clear at this point... we are especially dependent on
2255 * not finding a 'stale' h/w modified in a number of spots
2256 * once this page goes back into use
2258 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
2260 mem
->lopage
= lopage
;
2264 * vm_page_grab_fictitious:
2266 * Remove a fictitious page from the free list.
2267 * Returns VM_PAGE_NULL if there are no free pages.
2269 int c_vm_page_grab_fictitious
= 0;
2270 int c_vm_page_grab_fictitious_failed
= 0;
2271 int c_vm_page_release_fictitious
= 0;
2272 int c_vm_page_more_fictitious
= 0;
2275 vm_page_grab_fictitious_common(
2280 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
2282 vm_page_init(m
, phys_addr
, FALSE
);
2283 m
->fictitious
= TRUE
;
2285 c_vm_page_grab_fictitious
++;
2287 c_vm_page_grab_fictitious_failed
++;
2293 vm_page_grab_fictitious(void)
2295 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
2302 vm_page_grab_guard(void)
2305 page
= vm_page_grab_fictitious_common(vm_page_guard_addr
);
2306 if (page
) OSAddAtomic(1, &vm_guard_count
);
2312 * vm_page_release_fictitious:
2314 * Release a fictitious page to the zone pool
2317 vm_page_release_fictitious(
2320 assert((m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
) || (m
->vm_page_q_state
== VM_PAGE_IS_WIRED
));
2321 assert(m
->fictitious
);
2322 assert(VM_PAGE_GET_PHYS_PAGE(m
) == vm_page_fictitious_addr
||
2323 VM_PAGE_GET_PHYS_PAGE(m
) == vm_page_guard_addr
);
2326 if (VM_PAGE_GET_PHYS_PAGE(m
) == vm_page_guard_addr
) OSAddAtomic(-1, &vm_guard_count
);
2328 c_vm_page_release_fictitious
++;
2330 zfree(vm_page_zone
, m
);
2334 * vm_page_more_fictitious:
2336 * Add more fictitious pages to the zone.
2337 * Allowed to block. This routine is way intimate
2338 * with the zones code, for several reasons:
2339 * 1. we need to carve some page structures out of physical
2340 * memory before zones work, so they _cannot_ come from
2342 * 2. the zone needs to be collectable in order to prevent
2343 * growth without bound. These structures are used by
2344 * the device pager (by the hundreds and thousands), as
2345 * private pages for pageout, and as blocking pages for
2346 * pagein. Temporary bursts in demand should not result in
2347 * permanent allocation of a resource.
2348 * 3. To smooth allocation humps, we allocate single pages
2349 * with kernel_memory_allocate(), and cram them into the
2353 void vm_page_more_fictitious(void)
2356 kern_return_t retval
;
2358 c_vm_page_more_fictitious
++;
2361 * Allocate a single page from the zone_map. Do not wait if no physical
2362 * pages are immediately available, and do not zero the space. We need
2363 * our own blocking lock here to prevent having multiple,
2364 * simultaneous requests from piling up on the zone_map lock. Exactly
2365 * one (of our) threads should be potentially waiting on the map lock.
2366 * If winner is not vm-privileged, then the page allocation will fail,
2367 * and it will temporarily block here in the vm_page_wait().
2369 lck_mtx_lock(&vm_page_alloc_lock
);
2371 * If another thread allocated space, just bail out now.
2373 if (zone_free_count(vm_page_zone
) > 5) {
2375 * The number "5" is a small number that is larger than the
2376 * number of fictitious pages that any single caller will
2377 * attempt to allocate. Otherwise, a thread will attempt to
2378 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2379 * release all of the resources and locks already acquired,
2380 * and then call this routine. This routine finds the pages
2381 * that the caller released, so fails to allocate new space.
2382 * The process repeats infinitely. The largest known number
2383 * of fictitious pages required in this manner is 2. 5 is
2384 * simply a somewhat larger number.
2386 lck_mtx_unlock(&vm_page_alloc_lock
);
2390 retval
= kernel_memory_allocate(zone_map
,
2391 &addr
, PAGE_SIZE
, 0,
2392 KMA_KOBJECT
|KMA_NOPAGEWAIT
, VM_KERN_MEMORY_ZONE
);
2393 if (retval
!= KERN_SUCCESS
) {
2395 * No page was available. Drop the
2396 * lock to give another thread a chance at it, and
2397 * wait for the pageout daemon to make progress.
2399 lck_mtx_unlock(&vm_page_alloc_lock
);
2400 vm_page_wait(THREAD_UNINT
);
2404 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
2406 lck_mtx_unlock(&vm_page_alloc_lock
);
2413 * Return true if it is not likely that a non-vm_privileged thread
2414 * can get memory without blocking. Advisory only, since the
2415 * situation may change under us.
2420 /* No locking, at worst we will fib. */
2421 return( vm_page_free_count
<= vm_page_free_reserved
);
2425 #if CONFIG_BACKGROUND_QUEUE
2428 vm_page_update_background_state(vm_page_t mem
)
2430 if (vm_page_background_mode
== VM_PAGE_BG_DISABLED
)
2433 if (mem
->vm_page_in_background
== FALSE
)
2436 #if BACKGROUNDQ_BASED_ON_QOS
2437 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS
) <= THREAD_QOS_LEGACY
)
2442 my_task
= current_task();
2445 if (proc_get_effective_task_policy(my_task
, TASK_POLICY_DARWIN_BG
))
2449 vm_page_lockspin_queues();
2451 mem
->vm_page_in_background
= FALSE
;
2452 vm_page_background_promoted_count
++;
2454 vm_page_remove_from_backgroundq(mem
);
2456 vm_page_unlock_queues();
2461 vm_page_assign_background_state(vm_page_t mem
)
2463 if (vm_page_background_mode
== VM_PAGE_BG_DISABLED
)
2466 #if BACKGROUNDQ_BASED_ON_QOS
2467 if (proc_get_effective_thread_policy(current_thread(), TASK_POLICY_QOS
) <= THREAD_QOS_LEGACY
)
2468 mem
->vm_page_in_background
= TRUE
;
2470 mem
->vm_page_in_background
= FALSE
;
2474 my_task
= current_task();
2477 mem
->vm_page_in_background
= proc_get_effective_task_policy(my_task
, TASK_POLICY_DARWIN_BG
);
2483 vm_page_remove_from_backgroundq(
2486 vm_object_t m_object
;
2488 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2490 if (mem
->vm_page_on_backgroundq
) {
2491 vm_page_queue_remove(&vm_page_queue_background
, mem
, vm_page_t
, vm_page_backgroundq
);
2493 mem
->vm_page_backgroundq
.next
= 0;
2494 mem
->vm_page_backgroundq
.prev
= 0;
2495 mem
->vm_page_on_backgroundq
= FALSE
;
2497 vm_page_background_count
--;
2499 m_object
= VM_PAGE_OBJECT(mem
);
2501 if (m_object
->internal
)
2502 vm_page_background_internal_count
--;
2504 vm_page_background_external_count
--;
2506 assert(VM_PAGE_UNPACK_PTR(mem
->vm_page_backgroundq
.next
) == (uintptr_t)NULL
&&
2507 VM_PAGE_UNPACK_PTR(mem
->vm_page_backgroundq
.prev
) == (uintptr_t)NULL
);
2513 vm_page_add_to_backgroundq(
2517 vm_object_t m_object
;
2519 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2521 if (vm_page_background_mode
== VM_PAGE_BG_DISABLED
)
2524 if (mem
->vm_page_on_backgroundq
== FALSE
) {
2526 m_object
= VM_PAGE_OBJECT(mem
);
2528 if (vm_page_background_exclude_external
&& !m_object
->internal
)
2532 vm_page_queue_enter_first(&vm_page_queue_background
, mem
, vm_page_t
, vm_page_backgroundq
);
2534 vm_page_queue_enter(&vm_page_queue_background
, mem
, vm_page_t
, vm_page_backgroundq
);
2535 mem
->vm_page_on_backgroundq
= TRUE
;
2537 vm_page_background_count
++;
2539 if (m_object
->internal
)
2540 vm_page_background_internal_count
++;
2542 vm_page_background_external_count
++;
2549 * this is an interface to support bring-up of drivers
2550 * on platforms with physical memory > 4G...
2552 int vm_himemory_mode
= 2;
2556 * this interface exists to support hardware controllers
2557 * incapable of generating DMAs with more than 32 bits
2558 * of address on platforms with physical memory > 4G...
2560 unsigned int vm_lopages_allocated_q
= 0;
2561 unsigned int vm_lopages_allocated_cpm_success
= 0;
2562 unsigned int vm_lopages_allocated_cpm_failed
= 0;
2563 vm_page_queue_head_t vm_lopage_queue_free
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
)));
2566 vm_page_grablo(void)
2570 if (vm_lopage_needed
== FALSE
)
2571 return (vm_page_grab());
2573 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2575 if ( !vm_page_queue_empty(&vm_lopage_queue_free
)) {
2576 vm_page_queue_remove_first(&vm_lopage_queue_free
,
2580 assert(vm_lopage_free_count
);
2581 assert(mem
->vm_page_q_state
== VM_PAGE_ON_FREE_LOPAGE_Q
);
2582 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
2584 vm_lopage_free_count
--;
2585 vm_lopages_allocated_q
++;
2587 if (vm_lopage_free_count
< vm_lopage_lowater
)
2588 vm_lopage_refill
= TRUE
;
2590 lck_mtx_unlock(&vm_page_queue_free_lock
);
2592 #if CONFIG_BACKGROUND_QUEUE
2593 vm_page_assign_background_state(mem
);
2596 lck_mtx_unlock(&vm_page_queue_free_lock
);
2598 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
2600 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2601 vm_lopages_allocated_cpm_failed
++;
2602 lck_mtx_unlock(&vm_page_queue_free_lock
);
2604 return (VM_PAGE_NULL
);
2606 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
2610 vm_page_lockspin_queues();
2612 mem
->gobbled
= FALSE
;
2613 vm_page_gobble_count
--;
2614 vm_page_wire_count
--;
2616 vm_lopages_allocated_cpm_success
++;
2617 vm_page_unlock_queues();
2620 assert(!mem
->pmapped
);
2621 assert(!mem
->wpmapped
);
2622 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
)));
2624 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
2633 * first try to grab a page from the per-cpu free list...
2634 * this must be done while pre-emption is disabled... if
2635 * a page is available, we're done...
2636 * if no page is available, grab the vm_page_queue_free_lock
2637 * and see if current number of free pages would allow us
2638 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2639 * if there are pages available, disable preemption and
2640 * recheck the state of the per-cpu free list... we could
2641 * have been preempted and moved to a different cpu, or
2642 * some other thread could have re-filled it... if still
2643 * empty, figure out how many pages we can steal from the
2644 * global free queue and move to the per-cpu queue...
2645 * return 1 of these pages when done... only wakeup the
2646 * pageout_scan thread if we moved pages from the global
2647 * list... no need for the wakeup if we've satisfied the
2648 * request from the per-cpu queue.
2651 #if CONFIG_SECLUDED_MEMORY
2652 vm_page_t
vm_page_grab_secluded(void);
2653 #endif /* CONFIG_SECLUDED_MEMORY */
2658 return vm_page_grab_options(0);
2662 boolean_t hibernate_rebuild_needed
= FALSE
;
2663 #endif /* HIBERNATION */
2666 vm_page_grab_options(
2671 disable_preemption();
2673 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2674 return_page_from_cpu_list
:
2675 assert(mem
->vm_page_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
);
2678 if (hibernate_rebuild_needed
) {
2679 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__
, __LINE__
);
2681 #endif /* HIBERNATION */
2682 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2683 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->snext
;
2685 enable_preemption();
2686 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
2687 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
2689 assert(mem
->listq
.next
== 0 && mem
->listq
.prev
== 0);
2690 assert(mem
->tabled
== FALSE
);
2691 assert(mem
->vm_page_object
== 0);
2692 assert(!mem
->laundry
);
2693 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem
)));
2695 assert(!mem
->pmapped
);
2696 assert(!mem
->wpmapped
);
2697 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
)));
2699 #if CONFIG_BACKGROUND_QUEUE
2700 vm_page_assign_background_state(mem
);
2704 enable_preemption();
2708 * Optionally produce warnings if the wire or gobble
2709 * counts exceed some threshold.
2711 #if VM_PAGE_WIRE_COUNT_WARNING
2712 if (vm_page_wire_count
>= VM_PAGE_WIRE_COUNT_WARNING
) {
2713 printf("mk: vm_page_grab(): high wired page count of %d\n",
2714 vm_page_wire_count
);
2717 #if VM_PAGE_GOBBLE_COUNT_WARNING
2718 if (vm_page_gobble_count
>= VM_PAGE_GOBBLE_COUNT_WARNING
) {
2719 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2720 vm_page_gobble_count
);
2724 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2727 * Only let privileged threads (involved in pageout)
2728 * dip into the reserved pool.
2730 if ((vm_page_free_count
< vm_page_free_reserved
) &&
2731 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
2732 /* no page for us in the free queue... */
2733 lck_mtx_unlock(&vm_page_queue_free_lock
);
2736 #if CONFIG_SECLUDED_MEMORY
2737 /* ... but can we try and grab from the secluded queue? */
2738 if (vm_page_secluded_count
> 0 &&
2739 ((grab_options
& VM_PAGE_GRAB_SECLUDED
) ||
2740 task_can_use_secluded_mem(current_task()))) {
2741 mem
= vm_page_grab_secluded();
2742 if (grab_options
& VM_PAGE_GRAB_SECLUDED
) {
2743 vm_page_secluded
.grab_for_iokit
++;
2745 vm_page_secluded
.grab_for_iokit_success
++;
2749 VM_CHECK_MEMORYSTATUS
;
2753 #else /* CONFIG_SECLUDED_MEMORY */
2754 (void) grab_options
;
2755 #endif /* CONFIG_SECLUDED_MEMORY */
2760 unsigned int pages_to_steal
;
2762 unsigned int clump_end
, sub_count
;
2764 while ( vm_page_free_count
== 0 ) {
2766 lck_mtx_unlock(&vm_page_queue_free_lock
);
2768 * must be a privileged thread to be
2769 * in this state since a non-privileged
2770 * thread would have bailed if we were
2771 * under the vm_page_free_reserved mark
2774 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2777 disable_preemption();
2779 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2780 lck_mtx_unlock(&vm_page_queue_free_lock
);
2783 * we got preempted and moved to another processor
2784 * or we got preempted and someone else ran and filled the cache
2786 goto return_page_from_cpu_list
;
2788 if (vm_page_free_count
<= vm_page_free_reserved
)
2791 if (vm_free_magazine_refill_limit
<= (vm_page_free_count
- vm_page_free_reserved
))
2792 pages_to_steal
= vm_free_magazine_refill_limit
;
2794 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
2796 color
= PROCESSOR_DATA(current_processor(), start_color
);
2799 vm_page_free_count
-= pages_to_steal
;
2800 clump_end
= sub_count
= 0;
2802 while (pages_to_steal
--) {
2804 while (vm_page_queue_empty(&vm_page_queue_free
[color
].qhead
))
2805 color
= (color
+ 1) & vm_color_mask
;
2806 #if defined(__x86_64__)
2807 vm_page_queue_remove_first_with_clump(&vm_page_queue_free
[color
].qhead
,
2813 vm_page_queue_remove_first(&vm_page_queue_free
[color
].qhead
,
2819 assert(mem
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
);
2821 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
2823 #if defined(__arm__) || defined(__arm64__)
2824 color
= (color
+ 1) & vm_color_mask
;
2827 #if DEVELOPMENT || DEBUG
2831 vm_clump_update_stats(sub_count
);
2833 color
= (color
+ 1) & vm_color_mask
;
2836 if (clump_end
) color
= (color
+ 1) & vm_color_mask
;
2838 #endif /* if DEVELOPMENT || DEBUG */
2840 #endif /* if defined(__arm__) || defined(__arm64__) */
2848 assert(mem
->listq
.next
== 0 && mem
->listq
.prev
== 0);
2849 assert(mem
->tabled
== FALSE
);
2850 assert(mem
->vm_page_object
== 0);
2851 assert(!mem
->laundry
);
2853 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_LOCAL_Q
;
2855 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem
)));
2857 assert(!mem
->pmapped
);
2858 assert(!mem
->wpmapped
);
2859 assert(!pmap_is_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
)));
2861 #if defined (__x86_64__) && (DEVELOPMENT || DEBUG)
2862 vm_clump_update_stats(sub_count
);
2864 lck_mtx_unlock(&vm_page_queue_free_lock
);
2867 if (hibernate_rebuild_needed
) {
2868 panic("%s:%d should not modify cpu->free_pages while hibernating", __FUNCTION__
, __LINE__
);
2870 #endif /* HIBERNATION */
2871 PROCESSOR_DATA(current_processor(), free_pages
) = head
->snext
;
2872 PROCESSOR_DATA(current_processor(), start_color
) = color
;
2875 * satisfy this request
2877 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2879 assert(mem
->vm_page_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
);
2881 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
2882 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
2884 enable_preemption();
2887 * Decide if we should poke the pageout daemon.
2888 * We do this if the free count is less than the low
2889 * water mark, or if the free count is less than the high
2890 * water mark (but above the low water mark) and the inactive
2891 * count is less than its target.
2893 * We don't have the counts locked ... if they change a little,
2894 * it doesn't really matter.
2896 if ((vm_page_free_count
< vm_page_free_min
) ||
2897 ((vm_page_free_count
< vm_page_free_target
) &&
2898 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
2899 thread_wakeup((event_t
) &vm_page_free_wanted
);
2901 VM_CHECK_MEMORYSTATUS
;
2904 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2906 #if CONFIG_BACKGROUND_QUEUE
2907 vm_page_assign_background_state(mem
);
2913 #if CONFIG_SECLUDED_MEMORY
2915 vm_page_grab_secluded(void)
2921 if (vm_page_secluded_count
== 0) {
2922 /* no secluded pages to grab... */
2923 return VM_PAGE_NULL
;
2926 /* secluded queue is protected by the VM page queue lock */
2927 vm_page_lock_queues();
2929 if (vm_page_secluded_count
== 0) {
2930 /* no secluded pages to grab... */
2931 vm_page_unlock_queues();
2932 return VM_PAGE_NULL
;
2936 /* can we grab from the secluded queue? */
2937 if (vm_page_secluded_count
> vm_page_secluded_target
||
2938 (vm_page_secluded_count
> 0 &&
2939 task_can_use_secluded_mem(current_task()))) {
2942 /* can't grab from secluded queue... */
2943 vm_page_unlock_queues();
2944 return VM_PAGE_NULL
;
2948 /* we can grab a page from secluded queue! */
2949 assert((vm_page_secluded_count_free
+
2950 vm_page_secluded_count_inuse
) ==
2951 vm_page_secluded_count
);
2952 if (current_task()->task_can_use_secluded_mem
) {
2953 assert(num_tasks_can_use_secluded_mem
> 0);
2955 assert(!vm_page_queue_empty(&vm_page_queue_secluded
));
2956 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2957 mem
= (vm_page_t
)vm_page_queue_first(&vm_page_queue_secluded
);
2958 assert(mem
->vm_page_q_state
== VM_PAGE_ON_SECLUDED_Q
);
2959 vm_page_queues_remove(mem
, TRUE
);
2961 object
= VM_PAGE_OBJECT(mem
);
2963 assert(!mem
->fictitious
);
2964 assert(!VM_PAGE_WIRED(mem
));
2965 if (object
== VM_OBJECT_NULL
) {
2966 /* free for grab! */
2967 vm_page_unlock_queues();
2968 vm_page_secluded
.grab_success_free
++;
2971 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
2972 assert(VM_PAGE_OBJECT(mem
) == VM_OBJECT_NULL
);
2973 assert(mem
->pageq
.next
== 0);
2974 assert(mem
->pageq
.prev
== 0);
2975 assert(mem
->listq
.next
== 0);
2976 assert(mem
->listq
.prev
== 0);
2977 #if CONFIG_BACKGROUND_QUEUE
2978 assert(mem
->vm_page_on_backgroundq
== 0);
2979 assert(mem
->vm_page_backgroundq
.next
== 0);
2980 assert(mem
->vm_page_backgroundq
.prev
== 0);
2981 #endif /* CONFIG_BACKGROUND_QUEUE */
2985 assert(!object
->internal
);
2986 // vm_page_pageable_external_count--;
2988 if (!vm_object_lock_try(object
)) {
2989 // printf("SECLUDED: page %p: object %p locked\n", mem, object);
2990 vm_page_secluded
.grab_failure_locked
++;
2991 reactivate_secluded_page
:
2992 vm_page_activate(mem
);
2993 vm_page_unlock_queues();
2994 return VM_PAGE_NULL
;
2999 /* can't steal page in this state... */
3000 vm_object_unlock(object
);
3001 vm_page_secluded
.grab_failure_state
++;
3002 goto reactivate_secluded_page
;
3006 refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(mem
));
3007 if (refmod_state
& VM_MEM_REFERENCED
) {
3008 mem
->reference
= TRUE
;
3010 if (refmod_state
& VM_MEM_MODIFIED
) {
3011 SET_PAGE_DIRTY(mem
, FALSE
);
3013 if (mem
->dirty
|| mem
->precious
) {
3014 /* can't grab a dirty page; re-activate */
3015 // printf("SECLUDED: dirty page %p\n", mem);
3016 PAGE_WAKEUP_DONE(mem
);
3017 vm_page_secluded
.grab_failure_dirty
++;
3018 vm_object_unlock(object
);
3019 goto reactivate_secluded_page
;
3021 if (mem
->reference
) {
3022 /* it's been used but we do need to grab a page... */
3025 vm_page_unlock_queues();
3027 /* finish what vm_page_free() would have done... */
3028 vm_page_free_prepare_object(mem
, TRUE
);
3029 vm_object_unlock(object
);
3030 object
= VM_OBJECT_NULL
;
3031 if (vm_page_free_verify
) {
3032 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem
)));
3034 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
3035 vm_page_secluded
.grab_success_other
++;
3038 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
3039 assert(VM_PAGE_OBJECT(mem
) == VM_OBJECT_NULL
);
3040 assert(mem
->pageq
.next
== 0);
3041 assert(mem
->pageq
.prev
== 0);
3042 assert(mem
->listq
.next
== 0);
3043 assert(mem
->listq
.prev
== 0);
3044 #if CONFIG_BACKGROUND_QUEUE
3045 assert(mem
->vm_page_on_backgroundq
== 0);
3046 assert(mem
->vm_page_backgroundq
.next
== 0);
3047 assert(mem
->vm_page_backgroundq
.prev
== 0);
3048 #endif /* CONFIG_BACKGROUND_QUEUE */
3052 #endif /* CONFIG_SECLUDED_MEMORY */
3057 * Return a page to the free list.
3063 boolean_t page_queues_locked
)
3066 int need_wakeup
= 0;
3067 int need_priv_wakeup
= 0;
3068 #if CONFIG_SECLUDED_MEMORY
3069 int need_secluded_wakeup
= 0;
3070 #endif /* CONFIG_SECLUDED_MEMORY */
3072 if (page_queues_locked
) {
3073 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3075 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
3078 assert(!mem
->private && !mem
->fictitious
);
3079 if (vm_page_free_verify
) {
3080 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem
)));
3082 // dbgLog(VM_PAGE_GET_PHYS_PAGE(mem), vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
3084 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
3086 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3088 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
3090 assert(!mem
->laundry
);
3091 assert(mem
->vm_page_object
== 0);
3092 assert(mem
->pageq
.next
== 0 && mem
->pageq
.prev
== 0);
3093 assert(mem
->listq
.next
== 0 && mem
->listq
.prev
== 0);
3094 #if CONFIG_BACKGROUND_QUEUE
3095 assert(mem
->vm_page_backgroundq
.next
== 0 &&
3096 mem
->vm_page_backgroundq
.prev
== 0 &&
3097 mem
->vm_page_on_backgroundq
== FALSE
);
3099 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
3100 vm_lopage_free_count
< vm_lopage_free_limit
&&
3101 VM_PAGE_GET_PHYS_PAGE(mem
) < max_valid_low_ppnum
) {
3103 * this exists to support hardware controllers
3104 * incapable of generating DMAs with more than 32 bits
3105 * of address on platforms with physical memory > 4G...
3107 vm_page_queue_enter_first(&vm_lopage_queue_free
,
3111 vm_lopage_free_count
++;
3113 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
3114 vm_lopage_refill
= FALSE
;
3116 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_LOPAGE_Q
;
3118 #if CONFIG_SECLUDED_MEMORY
3119 } else if (vm_page_free_count
> vm_page_free_reserved
&&
3120 vm_page_secluded_count
< vm_page_secluded_target
&&
3121 num_tasks_can_use_secluded_mem
== 0) {
3123 * XXX FBDP TODO: also avoid refilling secluded queue
3124 * when some IOKit objects are already grabbing from it...
3126 if (!page_queues_locked
) {
3127 if (!vm_page_trylock_queues()) {
3128 /* take locks in right order */
3129 lck_mtx_unlock(&vm_page_queue_free_lock
);
3130 vm_page_lock_queues();
3131 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3134 mem
->lopage
= FALSE
;
3135 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3136 vm_page_queue_enter_first(&vm_page_queue_secluded
,
3140 mem
->vm_page_q_state
= VM_PAGE_ON_SECLUDED_Q
;
3141 vm_page_secluded_count
++;
3142 vm_page_secluded_count_free
++;
3143 if (!page_queues_locked
) {
3144 vm_page_unlock_queues();
3146 LCK_MTX_ASSERT(&vm_page_queue_free_lock
, LCK_MTX_ASSERT_OWNED
);
3147 if (vm_page_free_wanted_secluded
> 0) {
3148 vm_page_free_wanted_secluded
--;
3149 need_secluded_wakeup
= 1;
3151 #endif /* CONFIG_SECLUDED_MEMORY */
3153 mem
->lopage
= FALSE
;
3154 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_Q
;
3156 color
= VM_PAGE_GET_COLOR(mem
);
3157 #if defined(__x86_64__)
3158 vm_page_queue_enter_clump(&vm_page_queue_free
[color
].qhead
,
3163 vm_page_queue_enter(&vm_page_queue_free
[color
].qhead
,
3168 vm_page_free_count
++;
3170 * Check if we should wake up someone waiting for page.
3171 * But don't bother waking them unless they can allocate.
3173 * We wakeup only one thread, to prevent starvation.
3174 * Because the scheduling system handles wait queues FIFO,
3175 * if we wakeup all waiting threads, one greedy thread
3176 * can starve multiple niceguy threads. When the threads
3177 * all wakeup, the greedy threads runs first, grabs the page,
3178 * and waits for another page. It will be the first to run
3179 * when the next page is freed.
3181 * However, there is a slight danger here.
3182 * The thread we wake might not use the free page.
3183 * Then the other threads could wait indefinitely
3184 * while the page goes unused. To forestall this,
3185 * the pageout daemon will keep making free pages
3186 * as long as vm_page_free_wanted is non-zero.
3189 assert(vm_page_free_count
> 0);
3190 if (vm_page_free_wanted_privileged
> 0) {
3191 vm_page_free_wanted_privileged
--;
3192 need_priv_wakeup
= 1;
3193 #if CONFIG_SECLUDED_MEMORY
3194 } else if (vm_page_free_wanted_secluded
> 0 &&
3195 vm_page_free_count
> vm_page_free_reserved
) {
3196 vm_page_free_wanted_secluded
--;
3197 need_secluded_wakeup
= 1;
3198 #endif /* CONFIG_SECLUDED_MEMORY */
3199 } else if (vm_page_free_wanted
> 0 &&
3200 vm_page_free_count
> vm_page_free_reserved
) {
3201 vm_page_free_wanted
--;
3205 lck_mtx_unlock(&vm_page_queue_free_lock
);
3207 if (need_priv_wakeup
)
3208 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
3209 #if CONFIG_SECLUDED_MEMORY
3210 else if (need_secluded_wakeup
)
3211 thread_wakeup_one((event_t
) &vm_page_free_wanted_secluded
);
3212 #endif /* CONFIG_SECLUDED_MEMORY */
3213 else if (need_wakeup
)
3214 thread_wakeup_one((event_t
) &vm_page_free_count
);
3216 VM_CHECK_MEMORYSTATUS
;
3220 * This version of vm_page_release() is used only at startup
3221 * when we are single-threaded and pages are being released
3222 * for the first time. Hence, no locking or unnecessary checks are made.
3223 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
3226 vm_page_release_startup(
3229 vm_page_queue_t queue_free
;
3231 if (vm_lopage_free_count
< vm_lopage_free_limit
&&
3232 VM_PAGE_GET_PHYS_PAGE(mem
) < max_valid_low_ppnum
) {
3234 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_LOPAGE_Q
;
3235 vm_lopage_free_count
++;
3236 queue_free
= &vm_lopage_queue_free
;
3237 #if CONFIG_SECLUDED_MEMORY
3238 } else if (vm_page_secluded_count
< vm_page_secluded_target
) {
3239 mem
->lopage
= FALSE
;
3240 mem
->vm_page_q_state
= VM_PAGE_ON_SECLUDED_Q
;
3241 vm_page_secluded_count
++;
3242 vm_page_secluded_count_free
++;
3243 queue_free
= &vm_page_queue_secluded
;
3244 #endif /* CONFIG_SECLUDED_MEMORY */
3246 mem
->lopage
= FALSE
;
3247 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_Q
;
3248 vm_page_free_count
++;
3249 queue_free
= &vm_page_queue_free
[VM_PAGE_GET_COLOR(mem
)].qhead
;
3251 if (mem
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
) {
3252 #if defined(__x86_64__)
3253 vm_page_queue_enter_clump(queue_free
, mem
, vm_page_t
, pageq
);
3255 vm_page_queue_enter(queue_free
, mem
, vm_page_t
, pageq
);
3258 vm_page_queue_enter_first(queue_free
, mem
, vm_page_t
, pageq
);
3264 * Wait for a page to become available.
3265 * If there are plenty of free pages, then we don't sleep.
3268 * TRUE: There may be another page, try again
3269 * FALSE: We were interrupted out of our wait, don't try again
3277 * We can't use vm_page_free_reserved to make this
3278 * determination. Consider: some thread might
3279 * need to allocate two pages. The first allocation
3280 * succeeds, the second fails. After the first page is freed,
3281 * a call to vm_page_wait must really block.
3283 kern_return_t wait_result
;
3284 int need_wakeup
= 0;
3285 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
3287 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3289 if (is_privileged
&& vm_page_free_count
) {
3290 lck_mtx_unlock(&vm_page_queue_free_lock
);
3294 if (vm_page_free_count
>= vm_page_free_target
) {
3295 lck_mtx_unlock(&vm_page_queue_free_lock
);
3299 if (is_privileged
) {
3300 if (vm_page_free_wanted_privileged
++ == 0)
3302 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
3303 #if CONFIG_SECLUDED_MEMORY
3304 } else if (secluded_for_apps
&&
3305 task_can_use_secluded_mem(current_task())) {
3307 /* XXX FBDP: need pageq lock for this... */
3308 /* XXX FBDP: might wait even if pages available, */
3309 /* XXX FBDP: hopefully not for too long... */
3310 if (vm_page_secluded_count
> 0) {
3311 lck_mtx_unlock(&vm_page_queue_free_lock
);
3315 if (vm_page_free_wanted_secluded
++ == 0) {
3318 wait_result
= assert_wait(
3319 (event_t
)&vm_page_free_wanted_secluded
,
3321 #endif /* CONFIG_SECLUDED_MEMORY */
3323 if (vm_page_free_wanted
++ == 0)
3325 wait_result
= assert_wait((event_t
)&vm_page_free_count
,
3328 lck_mtx_unlock(&vm_page_queue_free_lock
);
3329 counter(c_vm_page_wait_block
++);
3332 thread_wakeup((event_t
)&vm_page_free_wanted
);
3334 if (wait_result
== THREAD_WAITING
) {
3335 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
3336 vm_page_free_wanted_privileged
,
3337 vm_page_free_wanted
,
3338 #if CONFIG_SECLUDED_MEMORY
3339 vm_page_free_wanted_secluded
,
3340 #else /* CONFIG_SECLUDED_MEMORY */
3342 #endif /* CONFIG_SECLUDED_MEMORY */
3344 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
3345 VM_DEBUG_EVENT(vm_page_wait_block
,
3346 VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
3349 return (wait_result
== THREAD_AWAKENED
);
3355 * Allocate and return a memory cell associated
3356 * with this VM object/offset pair.
3358 * Object must be locked.
3364 vm_object_offset_t offset
)
3369 vm_object_lock_assert_exclusive(object
);
3371 #if CONFIG_SECLUDED_MEMORY
3372 if (object
->can_grab_secluded
) {
3373 grab_options
|= VM_PAGE_GRAB_SECLUDED
;
3375 #endif /* CONFIG_SECLUDED_MEMORY */
3376 mem
= vm_page_grab_options(grab_options
);
3377 if (mem
== VM_PAGE_NULL
)
3378 return VM_PAGE_NULL
;
3380 vm_page_insert(mem
, object
, offset
);
3386 * vm_page_alloc_guard:
3388 * Allocate a fictitious page which will be used
3389 * as a guard page. The page will be inserted into
3390 * the object and returned to the caller.
3394 vm_page_alloc_guard(
3396 vm_object_offset_t offset
)
3400 vm_object_lock_assert_exclusive(object
);
3401 mem
= vm_page_grab_guard();
3402 if (mem
== VM_PAGE_NULL
)
3403 return VM_PAGE_NULL
;
3405 vm_page_insert(mem
, object
, offset
);
3411 counter(unsigned int c_laundry_pages_freed
= 0;)
3414 * vm_page_free_prepare:
3416 * Removes page from any queue it may be on
3417 * and disassociates it from its VM object.
3419 * Object and page queues must be locked prior to entry.
3422 vm_page_free_prepare(
3425 vm_page_free_prepare_queues(mem
);
3426 vm_page_free_prepare_object(mem
, TRUE
);
3431 vm_page_free_prepare_queues(
3434 vm_object_t m_object
;
3438 assert(mem
->vm_page_q_state
!= VM_PAGE_ON_FREE_Q
);
3439 assert(!mem
->cleaning
);
3440 m_object
= VM_PAGE_OBJECT(mem
);
3442 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3444 vm_object_lock_assert_exclusive(m_object
);
3448 * We may have to free a page while it's being laundered
3449 * if we lost its pager (due to a forced unmount, for example).
3450 * We need to call vm_pageout_steal_laundry() before removing
3451 * the page from its VM object, so that we can remove it
3452 * from its pageout queue and adjust the laundry accounting
3454 vm_pageout_steal_laundry(mem
, TRUE
);
3455 counter(++c_laundry_pages_freed
);
3458 vm_page_queues_remove(mem
, TRUE
);
3460 if (VM_PAGE_WIRED(mem
)) {
3461 assert(mem
->wire_count
> 0);
3465 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
3466 VM_OBJECT_WIRED_PAGE_REMOVE(m_object
, mem
);
3467 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, m_object
->wire_tag
);
3469 assert(m_object
->resident_page_count
>=
3470 m_object
->wired_page_count
);
3472 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
3473 OSAddAtomic(+1, &vm_page_purgeable_count
);
3474 assert(vm_page_purgeable_wired_count
> 0);
3475 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
3477 if ((m_object
->purgable
== VM_PURGABLE_VOLATILE
||
3478 m_object
->purgable
== VM_PURGABLE_EMPTY
) &&
3479 m_object
->vo_purgeable_owner
!= TASK_NULL
) {
3482 owner
= m_object
->vo_purgeable_owner
;
3484 * While wired, this page was accounted
3485 * as "non-volatile" but it should now
3486 * be accounted as "volatile".
3488 /* one less "non-volatile"... */
3489 ledger_debit(owner
->ledger
,
3490 task_ledgers
.purgeable_nonvolatile
,
3492 /* ... and "phys_footprint" */
3493 ledger_debit(owner
->ledger
,
3494 task_ledgers
.phys_footprint
,
3496 /* one more "volatile" */
3497 ledger_credit(owner
->ledger
,
3498 task_ledgers
.purgeable_volatile
,
3502 if (!mem
->private && !mem
->fictitious
)
3503 vm_page_wire_count
--;
3505 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
3506 mem
->wire_count
= 0;
3507 assert(!mem
->gobbled
);
3508 } else if (mem
->gobbled
) {
3509 if (!mem
->private && !mem
->fictitious
)
3510 vm_page_wire_count
--;
3511 vm_page_gobble_count
--;
3517 vm_page_free_prepare_object(
3519 boolean_t remove_from_hash
)
3522 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
3524 PAGE_WAKEUP(mem
); /* clears wanted */
3527 mem
->private = FALSE
;
3528 mem
->fictitious
= TRUE
;
3529 VM_PAGE_SET_PHYS_PAGE(mem
, vm_page_fictitious_addr
);
3531 if ( !mem
->fictitious
) {
3532 assert(mem
->pageq
.next
== 0);
3533 assert(mem
->pageq
.prev
== 0);
3534 assert(mem
->listq
.next
== 0);
3535 assert(mem
->listq
.prev
== 0);
3536 #if CONFIG_BACKGROUND_QUEUE
3537 assert(mem
->vm_page_backgroundq
.next
== 0);
3538 assert(mem
->vm_page_backgroundq
.prev
== 0);
3539 #endif /* CONFIG_BACKGROUND_QUEUE */
3540 assert(mem
->next_m
== 0);
3541 vm_page_init(mem
, VM_PAGE_GET_PHYS_PAGE(mem
), mem
->lopage
);
3549 * Returns the given page to the free list,
3550 * disassociating it with any VM object.
3552 * Object and page queues must be locked prior to entry.
3558 vm_page_free_prepare(mem
);
3560 if (mem
->fictitious
) {
3561 vm_page_release_fictitious(mem
);
3563 vm_page_release(mem
,
3564 TRUE
); /* page queues are locked */
3570 vm_page_free_unlocked(
3572 boolean_t remove_from_hash
)
3574 vm_page_lockspin_queues();
3575 vm_page_free_prepare_queues(mem
);
3576 vm_page_unlock_queues();
3578 vm_page_free_prepare_object(mem
, remove_from_hash
);
3580 if (mem
->fictitious
) {
3581 vm_page_release_fictitious(mem
);
3583 vm_page_release(mem
, FALSE
); /* page queues are not locked */
3589 * Free a list of pages. The list can be up to several hundred pages,
3590 * as blocked up by vm_pageout_scan().
3591 * The big win is not having to take the free list lock once
3594 * The VM page queues lock (vm_page_queue_lock) should NOT be held.
3595 * The VM page free queues lock (vm_page_queue_free_lock) should NOT be held.
3600 boolean_t prepare_object
)
3604 vm_page_t local_freeq
;
3607 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
3608 LCK_MTX_ASSERT(&vm_page_queue_free_lock
, LCK_MTX_ASSERT_NOTOWNED
);
3613 local_freeq
= VM_PAGE_NULL
;
3617 * break up the processing into smaller chunks so
3618 * that we can 'pipeline' the pages onto the
3619 * free list w/o introducing too much
3620 * contention on the global free queue lock
3622 while (mem
&& pg_count
< 64) {
3624 assert((mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
) ||
3625 (mem
->vm_page_q_state
== VM_PAGE_IS_WIRED
));
3626 #if CONFIG_BACKGROUND_QUEUE
3627 assert(mem
->vm_page_backgroundq
.next
== 0 &&
3628 mem
->vm_page_backgroundq
.prev
== 0 &&
3629 mem
->vm_page_on_backgroundq
== FALSE
);
3633 assert(mem
->pageq
.prev
== 0);
3635 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
3636 assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(mem
)));
3638 if (prepare_object
== TRUE
)
3639 vm_page_free_prepare_object(mem
, TRUE
);
3641 if (!mem
->fictitious
) {
3644 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
3645 vm_lopage_free_count
< vm_lopage_free_limit
&&
3646 VM_PAGE_GET_PHYS_PAGE(mem
) < max_valid_low_ppnum
) {
3647 vm_page_release(mem
, FALSE
); /* page queues are not locked */
3648 #if CONFIG_SECLUDED_MEMORY
3649 } else if (vm_page_secluded_count
< vm_page_secluded_target
&&
3650 num_tasks_can_use_secluded_mem
== 0) {
3651 vm_page_release(mem
,
3652 FALSE
); /* page queues are not locked */
3653 #endif /* CONFIG_SECLUDED_MEMORY */
3656 * IMPORTANT: we can't set the page "free" here
3657 * because that would make the page eligible for
3658 * a physically-contiguous allocation (see
3659 * vm_page_find_contiguous()) right away (we don't
3660 * hold the vm_page_queue_free lock). That would
3661 * cause trouble because the page is not actually
3662 * in the free queue yet...
3664 mem
->snext
= local_freeq
;
3668 pmap_clear_noencrypt(VM_PAGE_GET_PHYS_PAGE(mem
));
3671 assert(VM_PAGE_GET_PHYS_PAGE(mem
) == vm_page_fictitious_addr
||
3672 VM_PAGE_GET_PHYS_PAGE(mem
) == vm_page_guard_addr
);
3673 vm_page_release_fictitious(mem
);
3679 if ( (mem
= local_freeq
) ) {
3680 unsigned int avail_free_count
;
3681 unsigned int need_wakeup
= 0;
3682 unsigned int need_priv_wakeup
= 0;
3683 #if CONFIG_SECLUDED_MEMORY
3684 unsigned int need_wakeup_secluded
= 0;
3685 #endif /* CONFIG_SECLUDED_MEMORY */
3687 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
3694 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
3696 mem
->lopage
= FALSE
;
3697 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_Q
;
3699 color
= VM_PAGE_GET_COLOR(mem
);
3700 #if defined(__x86_64__)
3701 vm_page_queue_enter_clump(&vm_page_queue_free
[color
].qhead
,
3706 vm_page_queue_enter(&vm_page_queue_free
[color
].qhead
,
3713 vm_page_free_count
+= pg_count
;
3714 avail_free_count
= vm_page_free_count
;
3716 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
3718 if (avail_free_count
< vm_page_free_wanted_privileged
) {
3719 need_priv_wakeup
= avail_free_count
;
3720 vm_page_free_wanted_privileged
-= avail_free_count
;
3721 avail_free_count
= 0;
3723 need_priv_wakeup
= vm_page_free_wanted_privileged
;
3724 avail_free_count
-= vm_page_free_wanted_privileged
;
3725 vm_page_free_wanted_privileged
= 0;
3728 #if CONFIG_SECLUDED_MEMORY
3729 if (vm_page_free_wanted_secluded
> 0 &&
3730 avail_free_count
> vm_page_free_reserved
) {
3731 unsigned int available_pages
;
3732 available_pages
= (avail_free_count
-
3733 vm_page_free_reserved
);
3734 if (available_pages
<
3735 vm_page_free_wanted_secluded
) {
3736 need_wakeup_secluded
= available_pages
;
3737 vm_page_free_wanted_secluded
-=
3739 avail_free_count
-= available_pages
;
3741 need_wakeup_secluded
=
3742 vm_page_free_wanted_secluded
;
3744 vm_page_free_wanted_secluded
;
3745 vm_page_free_wanted_secluded
= 0;
3748 #endif /* CONFIG_SECLUDED_MEMORY */
3749 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
3750 unsigned int available_pages
;
3752 available_pages
= avail_free_count
- vm_page_free_reserved
;
3754 if (available_pages
>= vm_page_free_wanted
) {
3755 need_wakeup
= vm_page_free_wanted
;
3756 vm_page_free_wanted
= 0;
3758 need_wakeup
= available_pages
;
3759 vm_page_free_wanted
-= available_pages
;
3762 lck_mtx_unlock(&vm_page_queue_free_lock
);
3764 if (need_priv_wakeup
!= 0) {
3766 * There shouldn't be that many VM-privileged threads,
3767 * so let's wake them all up, even if we don't quite
3768 * have enough pages to satisfy them all.
3770 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
3772 #if CONFIG_SECLUDED_MEMORY
3773 if (need_wakeup_secluded
!= 0 &&
3774 vm_page_free_wanted_secluded
== 0) {
3775 thread_wakeup((event_t
)
3776 &vm_page_free_wanted_secluded
);
3779 need_wakeup_secluded
!= 0;
3780 need_wakeup_secluded
--) {
3783 &vm_page_free_wanted_secluded
);
3786 #endif /* CONFIG_SECLUDED_MEMORY */
3787 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
3789 * We don't expect to have any more waiters
3790 * after this, so let's wake them all up at
3793 thread_wakeup((event_t
) &vm_page_free_count
);
3794 } else for (; need_wakeup
!= 0; need_wakeup
--) {
3796 * Wake up one waiter per page we just released.
3798 thread_wakeup_one((event_t
) &vm_page_free_count
);
3801 VM_CHECK_MEMORYSTATUS
;
3810 * Mark this page as wired down by yet
3811 * another map, removing it from paging queues
3814 * The page's object and the page queues must be locked.
3822 boolean_t check_memorystatus
)
3824 vm_object_t m_object
;
3826 m_object
= VM_PAGE_OBJECT(mem
);
3828 // dbgLog(current_thread(), mem->offset, m_object, 1); /* (TEST/DEBUG) */
3832 vm_object_lock_assert_exclusive(m_object
);
3835 * In theory, the page should be in an object before it
3836 * gets wired, since we need to hold the object lock
3837 * to update some fields in the page structure.
3838 * However, some code (i386 pmap, for example) might want
3839 * to wire a page before it gets inserted into an object.
3840 * That's somewhat OK, as long as nobody else can get to
3841 * that page and update it at the same time.
3844 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3845 if ( !VM_PAGE_WIRED(mem
)) {
3848 vm_pageout_steal_laundry(mem
, TRUE
);
3850 vm_page_queues_remove(mem
, TRUE
);
3852 assert(mem
->wire_count
== 0);
3853 mem
->vm_page_q_state
= VM_PAGE_IS_WIRED
;
3857 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
3858 VM_OBJECT_WIRED_PAGE_ADD(m_object
, mem
);
3859 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, tag
);
3861 assert(m_object
->resident_page_count
>=
3862 m_object
->wired_page_count
);
3863 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
3864 assert(vm_page_purgeable_count
> 0);
3865 OSAddAtomic(-1, &vm_page_purgeable_count
);
3866 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
3868 if ((m_object
->purgable
== VM_PURGABLE_VOLATILE
||
3869 m_object
->purgable
== VM_PURGABLE_EMPTY
) &&
3870 m_object
->vo_purgeable_owner
!= TASK_NULL
) {
3873 owner
= m_object
->vo_purgeable_owner
;
3874 /* less volatile bytes */
3875 ledger_debit(owner
->ledger
,
3876 task_ledgers
.purgeable_volatile
,
3878 /* more not-quite-volatile bytes */
3879 ledger_credit(owner
->ledger
,
3880 task_ledgers
.purgeable_nonvolatile
,
3882 /* more footprint */
3883 ledger_credit(owner
->ledger
,
3884 task_ledgers
.phys_footprint
,
3887 if (m_object
->all_reusable
) {
3889 * Wired pages are not counted as "re-usable"
3890 * in "all_reusable" VM objects, so nothing
3893 } else if (mem
->reusable
) {
3895 * This page is not "re-usable" when it's
3896 * wired, so adjust its state and the
3899 vm_object_reuse_pages(m_object
,
3901 mem
->offset
+PAGE_SIZE_64
,
3905 assert(!mem
->reusable
);
3907 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
3908 vm_page_wire_count
++;
3910 vm_page_gobble_count
--;
3911 mem
->gobbled
= FALSE
;
3913 if (check_memorystatus
== TRUE
) {
3914 VM_CHECK_MEMORYSTATUS
;
3917 assert(!mem
->gobbled
);
3918 assert(mem
->vm_page_q_state
== VM_PAGE_IS_WIRED
);
3920 if (__improbable(mem
->wire_count
== 0)) {
3921 panic("vm_page_wire(%p): wire_count overflow", mem
);
3929 * Release one wiring of this page, potentially
3930 * enabling it to be paged again.
3932 * The page's object and the page queues must be locked.
3939 vm_object_t m_object
;
3941 m_object
= VM_PAGE_OBJECT(mem
);
3943 // dbgLog(current_thread(), mem->offset, m_object, 0); /* (TEST/DEBUG) */
3946 assert(VM_PAGE_WIRED(mem
));
3947 assert(mem
->wire_count
> 0);
3948 assert(!mem
->gobbled
);
3949 assert(m_object
!= VM_OBJECT_NULL
);
3950 vm_object_lock_assert_exclusive(m_object
);
3951 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3952 if (--mem
->wire_count
== 0) {
3954 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
3956 VM_OBJECT_WIRED_PAGE_UPDATE_START(m_object
);
3957 VM_OBJECT_WIRED_PAGE_REMOVE(m_object
, mem
);
3958 VM_OBJECT_WIRED_PAGE_UPDATE_END(m_object
, m_object
->wire_tag
);
3959 if (!mem
->private && !mem
->fictitious
) {
3960 vm_page_wire_count
--;
3963 assert(m_object
->resident_page_count
>=
3964 m_object
->wired_page_count
);
3965 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
) {
3966 OSAddAtomic(+1, &vm_page_purgeable_count
);
3967 assert(vm_page_purgeable_wired_count
> 0);
3968 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
3970 if ((m_object
->purgable
== VM_PURGABLE_VOLATILE
||
3971 m_object
->purgable
== VM_PURGABLE_EMPTY
) &&
3972 m_object
->vo_purgeable_owner
!= TASK_NULL
) {
3975 owner
= m_object
->vo_purgeable_owner
;
3976 /* more volatile bytes */
3977 ledger_credit(owner
->ledger
,
3978 task_ledgers
.purgeable_volatile
,
3980 /* less not-quite-volatile bytes */
3981 ledger_debit(owner
->ledger
,
3982 task_ledgers
.purgeable_nonvolatile
,
3984 /* less footprint */
3985 ledger_debit(owner
->ledger
,
3986 task_ledgers
.phys_footprint
,
3989 assert(m_object
!= kernel_object
);
3990 assert(mem
->pageq
.next
== 0 && mem
->pageq
.prev
== 0);
3992 if (queueit
== TRUE
) {
3993 if (m_object
->purgable
== VM_PURGABLE_EMPTY
) {
3994 vm_page_deactivate(mem
);
3996 vm_page_activate(mem
);
4000 VM_CHECK_MEMORYSTATUS
;
4007 * vm_page_deactivate:
4009 * Returns the given page to the inactive list,
4010 * indicating that no physical maps have access
4011 * to this page. [Used by the physical mapping system.]
4013 * The page queues must be locked.
4019 vm_page_deactivate_internal(m
, TRUE
);
4024 vm_page_deactivate_internal(
4026 boolean_t clear_hw_reference
)
4028 vm_object_t m_object
;
4030 m_object
= VM_PAGE_OBJECT(m
);
4033 assert(m_object
!= kernel_object
);
4034 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4036 // dbgLog(VM_PAGE_GET_PHYS_PAGE(m), vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
4037 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4039 * This page is no longer very interesting. If it was
4040 * interesting (active or inactive/referenced), then we
4041 * clear the reference bit and (re)enter it in the
4042 * inactive queue. Note wired pages should not have
4043 * their reference bit cleared.
4045 assert ( !(m
->absent
&& !m
->unusual
));
4047 if (m
->gobbled
) { /* can this happen? */
4048 assert( !VM_PAGE_WIRED(m
));
4050 if (!m
->private && !m
->fictitious
)
4051 vm_page_wire_count
--;
4052 vm_page_gobble_count
--;
4056 * if this page is currently on the pageout queue, we can't do the
4057 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4058 * and we can't remove it manually since we would need the object lock
4059 * (which is not required here) to decrement the activity_in_progress
4060 * reference which is held on the object while the page is in the pageout queue...
4061 * just let the normal laundry processing proceed
4063 if (m
->laundry
|| m
->private || m
->fictitious
||
4064 (m
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4065 (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
) ||
4069 if (!m
->absent
&& clear_hw_reference
== TRUE
)
4070 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m
));
4072 m
->reference
= FALSE
;
4073 m
->no_cache
= FALSE
;
4075 if ( !VM_PAGE_INACTIVE(m
)) {
4076 vm_page_queues_remove(m
, FALSE
);
4078 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4079 m
->dirty
&& m_object
->internal
&&
4080 (m_object
->purgable
== VM_PURGABLE_DENY
||
4081 m_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
4082 m_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
4083 vm_page_check_pageable_safe(m
);
4084 vm_page_queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
4085 m
->vm_page_q_state
= VM_PAGE_ON_THROTTLED_Q
;
4086 vm_page_throttled_count
++;
4088 if (m_object
->named
&& m_object
->ref_count
== 1) {
4089 vm_page_speculate(m
, FALSE
);
4090 #if DEVELOPMENT || DEBUG
4091 vm_page_speculative_recreated
++;
4094 vm_page_enqueue_inactive(m
, FALSE
);
4101 * vm_page_enqueue_cleaned
4103 * Put the page on the cleaned queue, mark it cleaned, etc.
4104 * Being on the cleaned queue (and having m->clean_queue set)
4105 * does ** NOT ** guarantee that the page is clean!
4107 * Call with the queues lock held.
4110 void vm_page_enqueue_cleaned(vm_page_t m
)
4112 vm_object_t m_object
;
4114 m_object
= VM_PAGE_OBJECT(m
);
4116 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4117 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4118 assert( !(m
->absent
&& !m
->unusual
));
4120 if (VM_PAGE_WIRED(m
)) {
4125 if (!m
->private && !m
->fictitious
)
4126 vm_page_wire_count
--;
4127 vm_page_gobble_count
--;
4131 * if this page is currently on the pageout queue, we can't do the
4132 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4133 * and we can't remove it manually since we would need the object lock
4134 * (which is not required here) to decrement the activity_in_progress
4135 * reference which is held on the object while the page is in the pageout queue...
4136 * just let the normal laundry processing proceed
4138 if (m
->laundry
|| m
->private || m
->fictitious
||
4139 (m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
) ||
4140 (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
4143 vm_page_queues_remove(m
, FALSE
);
4145 vm_page_check_pageable_safe(m
);
4146 vm_page_queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
4147 m
->vm_page_q_state
= VM_PAGE_ON_INACTIVE_CLEANED_Q
;
4148 vm_page_cleaned_count
++;
4150 vm_page_inactive_count
++;
4151 if (m_object
->internal
) {
4152 vm_page_pageable_internal_count
++;
4154 vm_page_pageable_external_count
++;
4156 #if CONFIG_BACKGROUND_QUEUE
4157 if (m
->vm_page_in_background
)
4158 vm_page_add_to_backgroundq(m
, TRUE
);
4160 vm_pageout_enqueued_cleaned
++;
4166 * Put the specified page on the active list (if appropriate).
4168 * The page queues must be locked.
4175 vm_object_t m_object
;
4177 m_object
= VM_PAGE_OBJECT(m
);
4180 #ifdef FIXME_4778297
4181 assert(m_object
!= kernel_object
);
4183 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4184 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4185 assert( !(m
->absent
&& !m
->unusual
));
4188 assert( !VM_PAGE_WIRED(m
));
4189 if (!m
->private && !m
->fictitious
)
4190 vm_page_wire_count
--;
4191 vm_page_gobble_count
--;
4195 * if this page is currently on the pageout queue, we can't do the
4196 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4197 * and we can't remove it manually since we would need the object lock
4198 * (which is not required here) to decrement the activity_in_progress
4199 * reference which is held on the object while the page is in the pageout queue...
4200 * just let the normal laundry processing proceed
4202 if (m
->laundry
|| m
->private || m
->fictitious
||
4203 (m
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4204 (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
))
4208 if (m
->vm_page_q_state
== VM_PAGE_ON_ACTIVE_Q
)
4209 panic("vm_page_activate: already active");
4212 if (m
->vm_page_q_state
== VM_PAGE_ON_SPECULATIVE_Q
) {
4213 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
4214 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
4217 vm_page_queues_remove(m
, FALSE
);
4219 if ( !VM_PAGE_WIRED(m
)) {
4220 vm_page_check_pageable_safe(m
);
4221 if (!VM_DYNAMIC_PAGING_ENABLED() &&
4222 m
->dirty
&& m_object
->internal
&&
4223 (m_object
->purgable
== VM_PURGABLE_DENY
||
4224 m_object
->purgable
== VM_PURGABLE_NONVOLATILE
||
4225 m_object
->purgable
== VM_PURGABLE_VOLATILE
)) {
4226 vm_page_queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
4227 m
->vm_page_q_state
= VM_PAGE_ON_THROTTLED_Q
;
4228 vm_page_throttled_count
++;
4230 #if CONFIG_SECLUDED_MEMORY
4231 if (secluded_for_filecache
&&
4232 vm_page_secluded_target
!= 0 &&
4233 num_tasks_can_use_secluded_mem
== 0 &&
4234 m_object
->eligible_for_secluded
) {
4235 vm_page_queue_enter(&vm_page_queue_secluded
, m
,
4237 m
->vm_page_q_state
= VM_PAGE_ON_SECLUDED_Q
;
4238 vm_page_secluded_count
++;
4239 vm_page_secluded_count_inuse
++;
4240 assert(!m_object
->internal
);
4241 // vm_page_pageable_external_count++;
4243 #endif /* CONFIG_SECLUDED_MEMORY */
4244 vm_page_enqueue_active(m
, FALSE
);
4246 m
->reference
= TRUE
;
4247 m
->no_cache
= FALSE
;
4254 * vm_page_speculate:
4256 * Put the specified page on the speculative list (if appropriate).
4258 * The page queues must be locked.
4265 struct vm_speculative_age_q
*aq
;
4266 vm_object_t m_object
;
4268 m_object
= VM_PAGE_OBJECT(m
);
4271 vm_page_check_pageable_safe(m
);
4273 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4274 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4275 assert( !(m
->absent
&& !m
->unusual
));
4276 assert(m_object
->internal
== FALSE
);
4279 * if this page is currently on the pageout queue, we can't do the
4280 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4281 * and we can't remove it manually since we would need the object lock
4282 * (which is not required here) to decrement the activity_in_progress
4283 * reference which is held on the object while the page is in the pageout queue...
4284 * just let the normal laundry processing proceed
4286 if (m
->laundry
|| m
->private || m
->fictitious
||
4287 (m
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4288 (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
))
4291 vm_page_queues_remove(m
, FALSE
);
4293 if ( !VM_PAGE_WIRED(m
)) {
4298 clock_get_system_nanotime(&sec
, &nsec
);
4299 ts
.tv_sec
= (unsigned int) sec
;
4302 if (vm_page_speculative_count
== 0) {
4304 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
4305 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
4307 aq
= &vm_page_queue_speculative
[speculative_age_index
];
4310 * set the timer to begin a new group
4312 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
4313 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
4315 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
4317 aq
= &vm_page_queue_speculative
[speculative_age_index
];
4319 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
4321 speculative_age_index
++;
4323 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
4324 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
4325 if (speculative_age_index
== speculative_steal_index
) {
4326 speculative_steal_index
= speculative_age_index
+ 1;
4328 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
4329 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
4331 aq
= &vm_page_queue_speculative
[speculative_age_index
];
4333 if (!vm_page_queue_empty(&aq
->age_q
))
4334 vm_page_speculate_ageit(aq
);
4336 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
4337 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
4339 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
4342 vm_page_enqueue_tail(&aq
->age_q
, &m
->pageq
);
4343 m
->vm_page_q_state
= VM_PAGE_ON_SPECULATIVE_Q
;
4344 vm_page_speculative_count
++;
4345 vm_page_pageable_external_count
++;
4348 vm_object_lock_assert_exclusive(m_object
);
4350 m_object
->pages_created
++;
4351 #if DEVELOPMENT || DEBUG
4352 vm_page_speculative_created
++;
4361 * move pages from the specified aging bin to
4362 * the speculative bin that pageout_scan claims from
4364 * The page queues must be locked.
4367 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
4369 struct vm_speculative_age_q
*sq
;
4372 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
4374 if (vm_page_queue_empty(&sq
->age_q
)) {
4375 sq
->age_q
.next
= aq
->age_q
.next
;
4376 sq
->age_q
.prev
= aq
->age_q
.prev
;
4378 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(sq
->age_q
.next
);
4379 t
->pageq
.prev
= VM_PAGE_PACK_PTR(&sq
->age_q
);
4381 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(sq
->age_q
.prev
);
4382 t
->pageq
.next
= VM_PAGE_PACK_PTR(&sq
->age_q
);
4384 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(sq
->age_q
.prev
);
4385 t
->pageq
.next
= aq
->age_q
.next
;
4387 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(aq
->age_q
.next
);
4388 t
->pageq
.prev
= sq
->age_q
.prev
;
4390 t
= (vm_page_t
)VM_PAGE_UNPACK_PTR(aq
->age_q
.prev
);
4391 t
->pageq
.next
= VM_PAGE_PACK_PTR(&sq
->age_q
);
4393 sq
->age_q
.prev
= aq
->age_q
.prev
;
4395 vm_page_queue_init(&aq
->age_q
);
4404 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
4405 assert(VM_PAGE_GET_PHYS_PAGE(m
) != vm_page_guard_addr
);
4407 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4409 * if this page is currently on the pageout queue, we can't do the
4410 * vm_page_queues_remove (which doesn't handle the pageout queue case)
4411 * and we can't remove it manually since we would need the object lock
4412 * (which is not required here) to decrement the activity_in_progress
4413 * reference which is held on the object while the page is in the pageout queue...
4414 * just let the normal laundry processing proceed
4416 if (m
->laundry
|| m
->private ||
4417 (m
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) ||
4418 (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
) ||
4422 m
->no_cache
= FALSE
;
4424 vm_page_queues_remove(m
, FALSE
);
4426 vm_page_enqueue_inactive(m
, FALSE
);
4431 vm_page_reactivate_all_throttled(void)
4433 vm_page_t first_throttled
, last_throttled
;
4434 vm_page_t first_active
;
4436 int extra_active_count
;
4437 int extra_internal_count
, extra_external_count
;
4438 vm_object_t m_object
;
4440 if (!VM_DYNAMIC_PAGING_ENABLED())
4443 extra_active_count
= 0;
4444 extra_internal_count
= 0;
4445 extra_external_count
= 0;
4446 vm_page_lock_queues();
4447 if (! vm_page_queue_empty(&vm_page_queue_throttled
)) {
4449 * Switch "throttled" pages to "active".
4451 vm_page_queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
4453 assert(m
->vm_page_q_state
== VM_PAGE_ON_THROTTLED_Q
);
4455 m_object
= VM_PAGE_OBJECT(m
);
4457 extra_active_count
++;
4458 if (m_object
->internal
) {
4459 extra_internal_count
++;
4461 extra_external_count
++;
4464 m
->vm_page_q_state
= VM_PAGE_ON_ACTIVE_Q
;
4466 #if CONFIG_BACKGROUND_QUEUE
4467 if (m
->vm_page_in_background
)
4468 vm_page_add_to_backgroundq(m
, FALSE
);
4473 * Transfer the entire throttled queue to a regular LRU page queues.
4474 * We insert it at the head of the active queue, so that these pages
4475 * get re-evaluated by the LRU algorithm first, since they've been
4476 * completely out of it until now.
4478 first_throttled
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_throttled
);
4479 last_throttled
= (vm_page_t
) vm_page_queue_last(&vm_page_queue_throttled
);
4480 first_active
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
4481 if (vm_page_queue_empty(&vm_page_queue_active
)) {
4482 vm_page_queue_active
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled
);
4484 first_active
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_throttled
);
4486 vm_page_queue_active
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_throttled
);
4487 first_throttled
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active
);
4488 last_throttled
->pageq
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active
);
4491 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
4493 vm_page_queue_init(&vm_page_queue_throttled
);
4495 * Adjust the global page counts.
4497 vm_page_active_count
+= extra_active_count
;
4498 vm_page_pageable_internal_count
+= extra_internal_count
;
4499 vm_page_pageable_external_count
+= extra_external_count
;
4500 vm_page_throttled_count
= 0;
4502 assert(vm_page_throttled_count
== 0);
4503 assert(vm_page_queue_empty(&vm_page_queue_throttled
));
4504 vm_page_unlock_queues();
4509 * move pages from the indicated local queue to the global active queue
4510 * its ok to fail if we're below the hard limit and force == FALSE
4511 * the nolocks == TRUE case is to allow this function to be run on
4512 * the hibernate path
4516 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
4519 vm_page_t first_local
, last_local
;
4520 vm_page_t first_active
;
4524 if (vm_page_local_q
== NULL
)
4527 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
4529 if (nolocks
== FALSE
) {
4530 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
4531 if ( !vm_page_trylockspin_queues())
4534 vm_page_lockspin_queues();
4536 VPL_LOCK(&lq
->vpl_lock
);
4538 if (lq
->vpl_count
) {
4540 * Switch "local" pages to "active".
4542 assert(!vm_page_queue_empty(&lq
->vpl_queue
));
4544 vm_page_queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
4546 vm_page_check_pageable_safe(m
);
4547 assert(m
->vm_page_q_state
== VM_PAGE_ON_ACTIVE_LOCAL_Q
);
4548 assert(!m
->fictitious
);
4550 if (m
->local_id
!= lid
)
4551 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
4554 m
->vm_page_q_state
= VM_PAGE_ON_ACTIVE_Q
;
4556 #if CONFIG_BACKGROUND_QUEUE
4557 if (m
->vm_page_in_background
)
4558 vm_page_add_to_backgroundq(m
, FALSE
);
4562 if (count
!= lq
->vpl_count
)
4563 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
4566 * Transfer the entire local queue to a regular LRU page queues.
4568 first_local
= (vm_page_t
) vm_page_queue_first(&lq
->vpl_queue
);
4569 last_local
= (vm_page_t
) vm_page_queue_last(&lq
->vpl_queue
);
4570 first_active
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
4572 if (vm_page_queue_empty(&vm_page_queue_active
)) {
4573 vm_page_queue_active
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local
);
4575 first_active
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local
);
4577 vm_page_queue_active
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local
);
4578 first_local
->pageq
.prev
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(&vm_page_queue_active
);
4579 last_local
->pageq
.next
= VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_active
);
4581 vm_page_queue_init(&lq
->vpl_queue
);
4583 * Adjust the global page counts.
4585 vm_page_active_count
+= lq
->vpl_count
;
4586 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
4587 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
4589 lq
->vpl_internal_count
= 0;
4590 lq
->vpl_external_count
= 0;
4592 assert(vm_page_queue_empty(&lq
->vpl_queue
));
4594 if (nolocks
== FALSE
) {
4595 VPL_UNLOCK(&lq
->vpl_lock
);
4596 vm_page_unlock_queues();
4601 * vm_page_part_zero_fill:
4603 * Zero-fill a part of the page.
4605 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
4607 vm_page_part_zero_fill(
4615 * we don't hold the page queue lock
4616 * so this check isn't safe to make
4621 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
4622 pmap_zero_part_page(VM_PAGE_GET_PHYS_PAGE(m
), m_pa
, len
);
4626 tmp
= vm_page_grab();
4627 if (tmp
== VM_PAGE_NULL
) {
4628 vm_page_wait(THREAD_UNINT
);
4633 vm_page_zero_fill(tmp
);
4635 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
4637 if((m_pa
+ len
) < PAGE_SIZE
) {
4638 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
4639 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
4641 vm_page_copy(tmp
,m
);
4648 * vm_page_zero_fill:
4650 * Zero-fill the specified page.
4657 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
4658 VM_PAGE_OBJECT(m
), m
->offset
, m
, 0,0);
4661 * we don't hold the page queue lock
4662 * so this check isn't safe to make
4667 // dbgTrace(0xAEAEAEAE, VM_PAGE_GET_PHYS_PAGE(m), 0); /* (BRINGUP) */
4668 pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m
));
4672 * vm_page_part_copy:
4674 * copy part of one page to another
4687 * we don't hold the page queue lock
4688 * so this check isn't safe to make
4690 VM_PAGE_CHECK(src_m
);
4691 VM_PAGE_CHECK(dst_m
);
4693 pmap_copy_part_page(VM_PAGE_GET_PHYS_PAGE(src_m
), src_pa
,
4694 VM_PAGE_GET_PHYS_PAGE(dst_m
), dst_pa
, len
);
4700 * Copy one page to another
4703 int vm_page_copy_cs_validations
= 0;
4704 int vm_page_copy_cs_tainted
= 0;
4711 vm_object_t src_m_object
;
4713 src_m_object
= VM_PAGE_OBJECT(src_m
);
4716 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
4717 src_m_object
, src_m
->offset
,
4718 VM_PAGE_OBJECT(dest_m
), dest_m
->offset
,
4722 * we don't hold the page queue lock
4723 * so this check isn't safe to make
4725 VM_PAGE_CHECK(src_m
);
4726 VM_PAGE_CHECK(dest_m
);
4728 vm_object_lock_assert_held(src_m_object
);
4730 if (src_m_object
!= VM_OBJECT_NULL
&&
4731 src_m_object
->code_signed
) {
4733 * We're copying a page from a code-signed object.
4734 * Whoever ends up mapping the copy page might care about
4735 * the original page's integrity, so let's validate the
4738 vm_page_copy_cs_validations
++;
4739 vm_page_validate_cs(src_m
);
4740 #if DEVELOPMENT || DEBUG
4741 DTRACE_VM4(codesigned_copy
,
4742 vm_object_t
, src_m_object
,
4743 vm_object_offset_t
, src_m
->offset
,
4744 int, src_m
->cs_validated
,
4745 int, src_m
->cs_tainted
);
4746 #endif /* DEVELOPMENT || DEBUG */
4750 if (vm_page_is_slideable(src_m
)) {
4751 boolean_t was_busy
= src_m
->busy
;
4753 (void) vm_page_slide(src_m
, 0);
4754 assert(src_m
->busy
);
4756 PAGE_WAKEUP_DONE(src_m
);
4761 * Propagate the cs_tainted bit to the copy page. Do not propagate
4762 * the cs_validated bit.
4764 dest_m
->cs_tainted
= src_m
->cs_tainted
;
4765 if (dest_m
->cs_tainted
) {
4766 vm_page_copy_cs_tainted
++;
4768 dest_m
->slid
= src_m
->slid
;
4769 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
4770 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(src_m
), VM_PAGE_GET_PHYS_PAGE(dest_m
));
4778 printf("vm_page %p: \n", p
);
4779 printf(" pageq: next=%p prev=%p\n",
4780 (vm_page_t
)VM_PAGE_UNPACK_PTR(p
->pageq
.next
),
4781 (vm_page_t
)VM_PAGE_UNPACK_PTR(p
->pageq
.prev
));
4782 printf(" listq: next=%p prev=%p\n",
4783 (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->listq
.next
)),
4784 (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->listq
.prev
)));
4785 printf(" next=%p\n", (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->next_m
)));
4786 printf(" object=%p offset=0x%llx\n",VM_PAGE_OBJECT(p
), p
->offset
);
4787 printf(" wire_count=%u\n", p
->wire_count
);
4788 printf(" q_state=%u\n", p
->vm_page_q_state
);
4790 printf(" %slaundry, %sref, %sgobbled, %sprivate\n",
4791 (p
->laundry
? "" : "!"),
4792 (p
->reference
? "" : "!"),
4793 (p
->gobbled
? "" : "!"),
4794 (p
->private ? "" : "!"));
4795 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
4796 (p
->busy
? "" : "!"),
4797 (p
->wanted
? "" : "!"),
4798 (p
->tabled
? "" : "!"),
4799 (p
->fictitious
? "" : "!"),
4800 (p
->pmapped
? "" : "!"),
4801 (p
->wpmapped
? "" : "!"));
4802 printf(" %sfree_when_done, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
4803 (p
->free_when_done
? "" : "!"),
4804 (p
->absent
? "" : "!"),
4805 (p
->error
? "" : "!"),
4806 (p
->dirty
? "" : "!"),
4807 (p
->cleaning
? "" : "!"),
4808 (p
->precious
? "" : "!"),
4809 (p
->clustered
? "" : "!"));
4810 printf(" %soverwriting, %srestart, %sunusual\n",
4811 (p
->overwriting
? "" : "!"),
4812 (p
->restart
? "" : "!"),
4813 (p
->unusual
? "" : "!"));
4814 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
4815 (p
->cs_validated
? "" : "!"),
4816 (p
->cs_tainted
? "" : "!"),
4817 (p
->cs_nx
? "" : "!"),
4818 (p
->no_cache
? "" : "!"));
4820 printf("phys_page=0x%x\n", VM_PAGE_GET_PHYS_PAGE(p
));
4824 * Check that the list of pages is ordered by
4825 * ascending physical address and has no holes.
4828 vm_page_verify_contiguous(
4830 unsigned int npages
)
4833 unsigned int page_count
;
4834 vm_offset_t prev_addr
;
4836 prev_addr
= VM_PAGE_GET_PHYS_PAGE(pages
);
4838 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
4839 if (VM_PAGE_GET_PHYS_PAGE(m
) != prev_addr
+ 1) {
4840 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
4841 m
, (long)prev_addr
, VM_PAGE_GET_PHYS_PAGE(m
));
4842 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
4843 panic("vm_page_verify_contiguous: not contiguous!");
4845 prev_addr
= VM_PAGE_GET_PHYS_PAGE(m
);
4848 if (page_count
!= npages
) {
4849 printf("pages %p actual count 0x%x but requested 0x%x\n",
4850 pages
, page_count
, npages
);
4851 panic("vm_page_verify_contiguous: count error");
4858 * Check the free lists for proper length etc.
4860 static boolean_t vm_page_verify_this_free_list_enabled
= FALSE
;
4862 vm_page_verify_free_list(
4863 vm_page_queue_head_t
*vm_page_queue
,
4865 vm_page_t look_for_page
,
4866 boolean_t expect_page
)
4868 unsigned int npages
;
4871 boolean_t found_page
;
4873 if (! vm_page_verify_this_free_list_enabled
)
4878 prev_m
= (vm_page_t
)((uintptr_t)vm_page_queue
);
4880 vm_page_queue_iterate(vm_page_queue
,
4885 if (m
== look_for_page
) {
4888 if ((vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.prev
) != prev_m
)
4889 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4890 color
, npages
, m
, (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.prev
), prev_m
);
4892 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4894 if (color
!= (unsigned int) -1) {
4895 if (VM_PAGE_GET_COLOR(m
) != color
)
4896 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4897 color
, npages
, m
, VM_PAGE_GET_COLOR(m
), color
);
4898 if (m
->vm_page_q_state
!= VM_PAGE_ON_FREE_Q
)
4899 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p - expecting q_state == VM_PAGE_ON_FREE_Q, found %d\n",
4900 color
, npages
, m
, m
->vm_page_q_state
);
4902 if (m
->vm_page_q_state
!= VM_PAGE_ON_FREE_LOCAL_Q
)
4903 panic("vm_page_verify_free_list(npages=%u): local page %p - expecting q_state == VM_PAGE_ON_FREE_LOCAL_Q, found %d\n",
4904 npages
, m
, m
->vm_page_q_state
);
4909 if (look_for_page
!= VM_PAGE_NULL
) {
4910 unsigned int other_color
;
4912 if (expect_page
&& !found_page
) {
4913 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4914 color
, npages
, look_for_page
, VM_PAGE_GET_PHYS_PAGE(look_for_page
));
4915 _vm_page_print(look_for_page
);
4916 for (other_color
= 0;
4917 other_color
< vm_colors
;
4919 if (other_color
== color
)
4921 vm_page_verify_free_list(&vm_page_queue_free
[other_color
].qhead
,
4922 other_color
, look_for_page
, FALSE
);
4924 if (color
== (unsigned int) -1) {
4925 vm_page_verify_free_list(&vm_lopage_queue_free
,
4926 (unsigned int) -1, look_for_page
, FALSE
);
4928 panic("vm_page_verify_free_list(color=%u)\n", color
);
4930 if (!expect_page
&& found_page
) {
4931 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4932 color
, npages
, look_for_page
, VM_PAGE_GET_PHYS_PAGE(look_for_page
));
4938 static boolean_t vm_page_verify_all_free_lists_enabled
= FALSE
;
4940 vm_page_verify_free_lists( void )
4942 unsigned int color
, npages
, nlopages
;
4943 boolean_t toggle
= TRUE
;
4945 if (! vm_page_verify_all_free_lists_enabled
)
4950 lck_mtx_lock(&vm_page_queue_free_lock
);
4952 if (vm_page_verify_this_free_list_enabled
== TRUE
) {
4954 * This variable has been set globally for extra checking of
4955 * each free list Q. Since we didn't set it, we don't own it
4956 * and we shouldn't toggle it.
4961 if (toggle
== TRUE
) {
4962 vm_page_verify_this_free_list_enabled
= TRUE
;
4965 for( color
= 0; color
< vm_colors
; color
++ ) {
4966 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
].qhead
,
4967 color
, VM_PAGE_NULL
, FALSE
);
4969 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
4971 VM_PAGE_NULL
, FALSE
);
4972 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
4973 panic("vm_page_verify_free_lists: "
4974 "npages %u free_count %d nlopages %u lo_free_count %u",
4975 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
4977 if (toggle
== TRUE
) {
4978 vm_page_verify_this_free_list_enabled
= FALSE
;
4981 lck_mtx_unlock(&vm_page_queue_free_lock
);
4984 #endif /* MACH_ASSERT */
4990 * 1 or more clients (currently only SEP) ask for a large contiguous chunk of memory
4991 * after the system has 'aged'. To ensure that other allocation requests don't mess
4992 * with the chances of that request being satisfied, we pre-allocate a single contiguous
4993 * 10MB buffer and hand it out to the first request of >= 4MB.
4996 kern_return_t
cpm_preallocate_early(void);
4998 vm_page_t cpm_preallocated_pages_list
= NULL
;
4999 boolean_t preallocated_buffer_available
= FALSE
;
5001 #define PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT ((10 * 1024 * 1024) / PAGE_SIZE_64) /* 10 MB */
5002 #define MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER ((4 * 1024 *1024) / PAGE_SIZE_64) /* 4 MB */
5005 cpm_preallocate_early(void)
5008 kern_return_t kr
= KERN_SUCCESS
;
5009 vm_map_size_t prealloc_size
= (PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT
* PAGE_SIZE_64
);
5011 printf("cpm_preallocate_early called to preallocate contiguous buffer of %llu pages\n", PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT
);
5013 kr
= cpm_allocate(CAST_DOWN(vm_size_t
, prealloc_size
), &cpm_preallocated_pages_list
, 0, 0, TRUE
, 0);
5015 if (kr
!= KERN_SUCCESS
) {
5016 printf("cpm_allocate for preallocated contig buffer failed with %d.\n", kr
);
5018 preallocated_buffer_available
= TRUE
;
5023 #endif /* __arm64__ */
5026 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
5029 * CONTIGUOUS PAGE ALLOCATION
5031 * Find a region large enough to contain at least n pages
5032 * of contiguous physical memory.
5034 * This is done by traversing the vm_page_t array in a linear fashion
5035 * we assume that the vm_page_t array has the avaiable physical pages in an
5036 * ordered, ascending list... this is currently true of all our implementations
5037 * and must remain so... there can be 'holes' in the array... we also can
5038 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
5039 * which use to happen via 'vm_page_convert'... that function was no longer
5040 * being called and was removed...
5042 * The basic flow consists of stabilizing some of the interesting state of
5043 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
5044 * sweep at the beginning of the array looking for pages that meet our criterea
5045 * for a 'stealable' page... currently we are pretty conservative... if the page
5046 * meets this criterea and is physically contiguous to the previous page in the 'run'
5047 * we keep developing it. If we hit a page that doesn't fit, we reset our state
5048 * and start to develop a new run... if at this point we've already considered
5049 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
5050 * and mutex_pause (which will yield the processor), to keep the latency low w/r
5051 * to other threads trying to acquire free pages (or move pages from q to q),
5052 * and then continue from the spot we left off... we only make 1 pass through the
5053 * array. Once we have a 'run' that is long enough, we'll go into the loop which
5054 * which steals the pages from the queues they're currently on... pages on the free
5055 * queue can be stolen directly... pages that are on any of the other queues
5056 * must be removed from the object they are tabled on... this requires taking the
5057 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
5058 * or if the state of the page behind the vm_object lock is no longer viable, we'll
5059 * dump the pages we've currently stolen back to the free list, and pick up our
5060 * scan from the point where we aborted the 'current' run.
5064 * - neither vm_page_queue nor vm_free_list lock can be held on entry
5066 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
5071 #define MAX_CONSIDERED_BEFORE_YIELD 1000
5074 #define RESET_STATE_OF_RUN() \
5076 prevcontaddr = -2; \
5078 free_considered = 0; \
5079 substitute_needed = 0; \
5084 * Can we steal in-use (i.e. not free) pages when searching for
5085 * physically-contiguous pages ?
5087 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
5089 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
5091 int vm_page_find_contig_debug
= 0;
5095 vm_page_find_contiguous(
5096 unsigned int contig_pages
,
5103 ppnum_t prevcontaddr
= 0;
5104 ppnum_t start_pnum
= 0;
5105 unsigned int npages
= 0, considered
= 0, scanned
= 0;
5106 unsigned int page_idx
= 0, start_idx
= 0, last_idx
= 0, orig_last_idx
= 0;
5107 unsigned int idx_last_contig_page_found
= 0;
5108 int free_considered
= 0, free_available
= 0;
5109 int substitute_needed
= 0;
5110 boolean_t wrapped
, zone_gc_called
= FALSE
;
5113 clock_sec_t tv_start_sec
= 0, tv_end_sec
= 0;
5114 clock_usec_t tv_start_usec
= 0, tv_end_usec
= 0;
5119 int stolen_pages
= 0;
5120 int compressed_pages
= 0;
5123 if (contig_pages
== 0)
5124 return VM_PAGE_NULL
;
5129 vm_page_verify_free_lists();
5132 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
5134 PAGE_REPLACEMENT_ALLOWED(TRUE
);
5136 vm_page_lock_queues();
5139 if (preallocated_buffer_available
) {
5141 if ((contig_pages
>= MIN_CONTIG_PAGES_REQUEST_FOR_PREALLOCATED_BUFFER
) && (contig_pages
<= PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT
)) {
5143 m
= cpm_preallocated_pages_list
;
5145 start_idx
= (unsigned int) (m
- &vm_pages
[0]);
5147 if (wire
== FALSE
) {
5149 last_idx
= start_idx
;
5151 for(npages
= 0; npages
< contig_pages
; npages
++, last_idx
++) {
5153 assert(vm_pages
[last_idx
].gobbled
== FALSE
);
5155 vm_pages
[last_idx
].gobbled
= TRUE
;
5156 vm_page_gobble_count
++;
5158 assert(1 == vm_pages
[last_idx
].wire_count
);
5160 * Gobbled pages are counted as wired pages. So no need to drop
5161 * the global wired page count. Just the page's wire count is fine.
5163 vm_pages
[last_idx
].wire_count
--;
5164 vm_pages
[last_idx
].vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
5169 last_idx
= start_idx
+ contig_pages
- 1;
5171 vm_pages
[last_idx
].snext
= NULL
;
5173 printf("Using preallocated buffer: Requested size (pages):%d... index range: %d-%d...freeing %llu pages\n", contig_pages
, start_idx
, last_idx
, PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT
- contig_pages
);
5176 for(npages
= contig_pages
; npages
< PREALLOCATED_CONTIG_BUFFER_PAGES_COUNT
; npages
++, last_idx
++) {
5178 VM_PAGE_ZERO_PAGEQ_ENTRY(&vm_pages
[last_idx
]);
5179 vm_page_free(&vm_pages
[last_idx
]);
5182 cpm_preallocated_pages_list
= NULL
;
5183 preallocated_buffer_available
= FALSE
;
5188 #endif /* __arm64__ */
5190 lck_mtx_lock(&vm_page_queue_free_lock
);
5192 RESET_STATE_OF_RUN();
5196 free_available
= vm_page_free_count
- vm_page_free_reserved
;
5200 if(flags
& KMA_LOMEM
)
5201 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
5203 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
5205 orig_last_idx
= idx_last_contig_page_found
;
5206 last_idx
= orig_last_idx
;
5208 for (page_idx
= last_idx
, start_idx
= last_idx
;
5209 npages
< contig_pages
&& page_idx
< vm_pages_count
;
5214 page_idx
>= orig_last_idx
) {
5216 * We're back where we started and we haven't
5217 * found any suitable contiguous range. Let's
5223 m
= &vm_pages
[page_idx
];
5225 assert(!m
->fictitious
);
5226 assert(!m
->private);
5228 if (max_pnum
&& VM_PAGE_GET_PHYS_PAGE(m
) > max_pnum
) {
5229 /* no more low pages... */
5232 if (!npages
& ((VM_PAGE_GET_PHYS_PAGE(m
) & pnum_mask
) != 0)) {
5236 RESET_STATE_OF_RUN();
5238 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
5239 m
->laundry
|| m
->wanted
||
5240 m
->cleaning
|| m
->overwriting
|| m
->free_when_done
) {
5242 * page is in a transient state
5243 * or a state we don't want to deal
5244 * with, so don't consider it which
5245 * means starting a new run
5247 RESET_STATE_OF_RUN();
5249 } else if ((m
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
) ||
5250 (m
->vm_page_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
) ||
5251 (m
->vm_page_q_state
== VM_PAGE_ON_FREE_LOPAGE_Q
) ||
5252 (m
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
5254 * page needs to be on one of our queues (other then the pageout or special free queues)
5255 * or it needs to belong to the compressor pool (which is now indicated
5256 * by vm_page_q_state == VM_PAGE_USED_BY_COMPRESSOR and falls out
5257 * from the check for VM_PAGE_NOT_ON_Q)
5258 * in order for it to be stable behind the
5259 * locks we hold at this point...
5260 * if not, don't consider it which
5261 * means starting a new run
5263 RESET_STATE_OF_RUN();
5265 } else if ((m
->vm_page_q_state
!= VM_PAGE_ON_FREE_Q
) && (!m
->tabled
|| m
->busy
)) {
5267 * pages on the free list are always 'busy'
5268 * so we couldn't test for 'busy' in the check
5269 * for the transient states... pages that are
5270 * 'free' are never 'tabled', so we also couldn't
5271 * test for 'tabled'. So we check here to make
5272 * sure that a non-free page is not busy and is
5273 * tabled on an object...
5274 * if not, don't consider it which
5275 * means starting a new run
5277 RESET_STATE_OF_RUN();
5280 if (VM_PAGE_GET_PHYS_PAGE(m
) != prevcontaddr
+ 1) {
5281 if ((VM_PAGE_GET_PHYS_PAGE(m
) & pnum_mask
) != 0) {
5282 RESET_STATE_OF_RUN();
5286 start_idx
= page_idx
;
5287 start_pnum
= VM_PAGE_GET_PHYS_PAGE(m
);
5292 prevcontaddr
= VM_PAGE_GET_PHYS_PAGE(m
);
5295 if (m
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
) {
5299 * This page is not free.
5300 * If we can't steal used pages,
5301 * we have to give up this run
5303 * Otherwise, we might need to
5304 * move the contents of this page
5305 * into a substitute page.
5307 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5308 if (m
->pmapped
|| m
->dirty
|| m
->precious
) {
5309 substitute_needed
++;
5312 RESET_STATE_OF_RUN();
5316 if ((free_considered
+ substitute_needed
) > free_available
) {
5318 * if we let this run continue
5319 * we will end up dropping the vm_page_free_count
5320 * below the reserve limit... we need to abort
5321 * this run, but we can at least re-consider this
5322 * page... thus the jump back to 'retry'
5324 RESET_STATE_OF_RUN();
5326 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
5331 * free_available == 0
5332 * so can't consider any free pages... if
5333 * we went to retry in this case, we'd
5334 * get stuck looking at the same page
5335 * w/o making any forward progress
5336 * we also want to take this path if we've already
5337 * reached our limit that controls the lock latency
5342 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
5344 PAGE_REPLACEMENT_ALLOWED(FALSE
);
5346 lck_mtx_unlock(&vm_page_queue_free_lock
);
5347 vm_page_unlock_queues();
5351 PAGE_REPLACEMENT_ALLOWED(TRUE
);
5353 vm_page_lock_queues();
5354 lck_mtx_lock(&vm_page_queue_free_lock
);
5356 RESET_STATE_OF_RUN();
5358 * reset our free page limit since we
5359 * dropped the lock protecting the vm_page_free_queue
5361 free_available
= vm_page_free_count
- vm_page_free_reserved
;
5372 if (npages
!= contig_pages
) {
5375 * We didn't find a contiguous range but we didn't
5376 * start from the very first page.
5377 * Start again from the very first page.
5379 RESET_STATE_OF_RUN();
5380 if( flags
& KMA_LOMEM
)
5381 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
5383 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
5385 page_idx
= last_idx
;
5389 lck_mtx_unlock(&vm_page_queue_free_lock
);
5393 unsigned int cur_idx
;
5394 unsigned int tmp_start_idx
;
5395 vm_object_t locked_object
= VM_OBJECT_NULL
;
5396 boolean_t abort_run
= FALSE
;
5398 assert(page_idx
- start_idx
== contig_pages
);
5400 tmp_start_idx
= start_idx
;
5403 * first pass through to pull the free pages
5404 * off of the free queue so that in case we
5405 * need substitute pages, we won't grab any
5406 * of the free pages in the run... we'll clear
5407 * the 'free' bit in the 2nd pass, and even in
5408 * an abort_run case, we'll collect all of the
5409 * free pages in this run and return them to the free list
5411 while (start_idx
< page_idx
) {
5413 m1
= &vm_pages
[start_idx
++];
5415 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
5416 assert(m1
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
);
5419 if (m1
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
) {
5422 color
= VM_PAGE_GET_COLOR(m1
);
5424 vm_page_verify_free_list(&vm_page_queue_free
[color
].qhead
, color
, m1
, TRUE
);
5426 vm_page_queue_remove(&vm_page_queue_free
[color
].qhead
,
5431 VM_PAGE_ZERO_PAGEQ_ENTRY(m1
);
5433 vm_page_verify_free_list(&vm_page_queue_free
[color
].qhead
, color
, VM_PAGE_NULL
, FALSE
);
5436 * Clear the "free" bit so that this page
5437 * does not get considered for another
5438 * concurrent physically-contiguous allocation.
5440 m1
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
5443 vm_page_free_count
--;
5446 if( flags
& KMA_LOMEM
)
5447 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
5449 vm_page_find_contiguous_last_idx
= page_idx
;
5452 * we can drop the free queue lock at this point since
5453 * we've pulled any 'free' candidates off of the list
5454 * we need it dropped so that we can do a vm_page_grab
5455 * when substituing for pmapped/dirty pages
5457 lck_mtx_unlock(&vm_page_queue_free_lock
);
5459 start_idx
= tmp_start_idx
;
5460 cur_idx
= page_idx
- 1;
5462 while (start_idx
++ < page_idx
) {
5464 * must go through the list from back to front
5465 * so that the page list is created in the
5466 * correct order - low -> high phys addresses
5468 m1
= &vm_pages
[cur_idx
--];
5470 if (m1
->vm_page_object
== 0) {
5472 * page has already been removed from
5473 * the free list in the 1st pass
5475 assert(m1
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
5476 assert(m1
->offset
== (vm_object_offset_t
) -1);
5478 assert(!m1
->wanted
);
5479 assert(!m1
->laundry
);
5483 boolean_t disconnected
, reusable
;
5485 if (abort_run
== TRUE
)
5488 assert(m1
->vm_page_q_state
!= VM_PAGE_NOT_ON_Q
);
5490 object
= VM_PAGE_OBJECT(m1
);
5492 if (object
!= locked_object
) {
5493 if (locked_object
) {
5494 vm_object_unlock(locked_object
);
5495 locked_object
= VM_OBJECT_NULL
;
5497 if (vm_object_lock_try(object
))
5498 locked_object
= object
;
5500 if (locked_object
== VM_OBJECT_NULL
||
5501 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
5502 m1
->laundry
|| m1
->wanted
||
5503 m1
->cleaning
|| m1
->overwriting
|| m1
->free_when_done
|| m1
->busy
) ||
5504 (m1
->vm_page_q_state
== VM_PAGE_ON_PAGEOUT_Q
)) {
5506 if (locked_object
) {
5507 vm_object_unlock(locked_object
);
5508 locked_object
= VM_OBJECT_NULL
;
5510 tmp_start_idx
= cur_idx
;
5515 disconnected
= FALSE
;
5518 if ((m1
->reusable
||
5519 object
->all_reusable
) &&
5520 (m1
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
) &&
5523 /* reusable page... */
5524 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1
));
5525 disconnected
= TRUE
;
5528 * ... not reused: can steal
5529 * without relocating contents.
5539 vm_object_offset_t offset
;
5541 m2
= vm_page_grab();
5543 if (m2
== VM_PAGE_NULL
) {
5544 if (locked_object
) {
5545 vm_object_unlock(locked_object
);
5546 locked_object
= VM_OBJECT_NULL
;
5548 tmp_start_idx
= cur_idx
;
5552 if (! disconnected
) {
5554 refmod
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m1
));
5559 /* copy the page's contents */
5560 pmap_copy_page(VM_PAGE_GET_PHYS_PAGE(m1
), VM_PAGE_GET_PHYS_PAGE(m2
));
5561 /* copy the page's state */
5562 assert(!VM_PAGE_WIRED(m1
));
5563 assert(m1
->vm_page_q_state
!= VM_PAGE_ON_FREE_Q
);
5564 assert(m1
->vm_page_q_state
!= VM_PAGE_ON_PAGEOUT_Q
);
5565 assert(!m1
->laundry
);
5566 m2
->reference
= m1
->reference
;
5567 assert(!m1
->gobbled
);
5568 assert(!m1
->private);
5569 m2
->no_cache
= m1
->no_cache
;
5572 assert(!m1
->wanted
);
5573 assert(!m1
->fictitious
);
5574 m2
->pmapped
= m1
->pmapped
; /* should flush cache ? */
5575 m2
->wpmapped
= m1
->wpmapped
;
5576 assert(!m1
->free_when_done
);
5577 m2
->absent
= m1
->absent
;
5578 m2
->error
= m1
->error
;
5579 m2
->dirty
= m1
->dirty
;
5580 assert(!m1
->cleaning
);
5581 m2
->precious
= m1
->precious
;
5582 m2
->clustered
= m1
->clustered
;
5583 assert(!m1
->overwriting
);
5584 m2
->restart
= m1
->restart
;
5585 m2
->unusual
= m1
->unusual
;
5586 m2
->cs_validated
= m1
->cs_validated
;
5587 m2
->cs_tainted
= m1
->cs_tainted
;
5588 m2
->cs_nx
= m1
->cs_nx
;
5591 * If m1 had really been reusable,
5592 * we would have just stolen it, so
5593 * let's not propagate it's "reusable"
5594 * bit and assert that m2 is not
5595 * marked as "reusable".
5597 // m2->reusable = m1->reusable;
5598 assert(!m2
->reusable
);
5600 // assert(!m1->lopage);
5601 m2
->slid
= m1
->slid
;
5603 if (m1
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
)
5604 m2
->vm_page_q_state
= VM_PAGE_USED_BY_COMPRESSOR
;
5607 * page may need to be flushed if
5608 * it is marshalled into a UPL
5609 * that is going to be used by a device
5610 * that doesn't support coherency
5612 m2
->written_by_kernel
= TRUE
;
5615 * make sure we clear the ref/mod state
5616 * from the pmap layer... else we risk
5617 * inheriting state from the last time
5618 * this page was used...
5620 pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m2
), VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
5622 if (refmod
& VM_MEM_REFERENCED
)
5623 m2
->reference
= TRUE
;
5624 if (refmod
& VM_MEM_MODIFIED
) {
5625 SET_PAGE_DIRTY(m2
, TRUE
);
5627 offset
= m1
->offset
;
5630 * completely cleans up the state
5631 * of the page so that it is ready
5632 * to be put onto the free list, or
5633 * for this purpose it looks like it
5634 * just came off of the free list
5636 vm_page_free_prepare(m1
);
5639 * now put the substitute page
5642 vm_page_insert_internal(m2
, locked_object
, offset
, VM_KERN_MEMORY_NONE
, TRUE
, TRUE
, FALSE
, FALSE
, NULL
);
5644 if (m2
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
) {
5646 m2
->wpmapped
= TRUE
;
5648 PMAP_ENTER(kernel_pmap
, m2
->offset
, m2
,
5649 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
, kr
);
5651 assert(kr
== KERN_SUCCESS
);
5657 vm_page_activate(m2
);
5659 vm_page_deactivate(m2
);
5661 PAGE_WAKEUP_DONE(m2
);
5664 assert(m1
->vm_page_q_state
!= VM_PAGE_USED_BY_COMPRESSOR
);
5667 * completely cleans up the state
5668 * of the page so that it is ready
5669 * to be put onto the free list, or
5670 * for this purpose it looks like it
5671 * just came off of the free list
5673 vm_page_free_prepare(m1
);
5679 #if CONFIG_BACKGROUND_QUEUE
5680 vm_page_assign_background_state(m1
);
5682 VM_PAGE_ZERO_PAGEQ_ENTRY(m1
);
5686 if (locked_object
) {
5687 vm_object_unlock(locked_object
);
5688 locked_object
= VM_OBJECT_NULL
;
5691 if (abort_run
== TRUE
) {
5693 * want the index of the last
5694 * page in this run that was
5695 * successfully 'stolen', so back
5696 * it up 1 for the auto-decrement on use
5697 * and 1 more to bump back over this page
5699 page_idx
= tmp_start_idx
+ 2;
5700 if (page_idx
>= vm_pages_count
) {
5702 if (m
!= VM_PAGE_NULL
) {
5703 vm_page_unlock_queues();
5704 vm_page_free_list(m
, FALSE
);
5705 vm_page_lock_queues();
5711 page_idx
= last_idx
= 0;
5717 * We didn't find a contiguous range but we didn't
5718 * start from the very first page.
5719 * Start again from the very first page.
5721 RESET_STATE_OF_RUN();
5723 if( flags
& KMA_LOMEM
)
5724 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
5726 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
5728 last_idx
= page_idx
;
5730 if (m
!= VM_PAGE_NULL
) {
5731 vm_page_unlock_queues();
5732 vm_page_free_list(m
, FALSE
);
5733 vm_page_lock_queues();
5738 lck_mtx_lock(&vm_page_queue_free_lock
);
5740 * reset our free page limit since we
5741 * dropped the lock protecting the vm_page_free_queue
5743 free_available
= vm_page_free_count
- vm_page_free_reserved
;
5747 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
5749 assert(m1
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
5750 assert(m1
->wire_count
== 0);
5754 m1
->vm_page_q_state
= VM_PAGE_IS_WIRED
;
5759 vm_page_gobble_count
+= npages
;
5762 * gobbled pages are also counted as wired pages
5764 vm_page_wire_count
+= npages
;
5766 assert(vm_page_verify_contiguous(m
, npages
));
5769 PAGE_REPLACEMENT_ALLOWED(FALSE
);
5771 vm_page_unlock_queues();
5774 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
5776 tv_end_sec
-= tv_start_sec
;
5777 if (tv_end_usec
< tv_start_usec
) {
5779 tv_end_usec
+= 1000000;
5781 tv_end_usec
-= tv_start_usec
;
5782 if (tv_end_usec
>= 1000000) {
5784 tv_end_sec
-= 1000000;
5786 if (vm_page_find_contig_debug
) {
5787 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
5788 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
5789 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
5790 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
5795 vm_page_verify_free_lists();
5797 if (m
== NULL
&& zone_gc_called
== FALSE
) {
5798 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
5799 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
5800 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
, vm_page_wire_count
);
5802 if (consider_buffer_cache_collect
!= NULL
) {
5803 (void)(*consider_buffer_cache_collect
)(1);
5806 consider_zone_gc(FALSE
);
5808 zone_gc_called
= TRUE
;
5810 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count
);
5811 goto full_scan_again
;
5818 * Allocate a list of contiguous, wired pages.
5830 unsigned int npages
;
5832 if (size
% PAGE_SIZE
!= 0)
5833 return KERN_INVALID_ARGUMENT
;
5835 npages
= (unsigned int) (size
/ PAGE_SIZE
);
5836 if (npages
!= size
/ PAGE_SIZE
) {
5837 /* 32-bit overflow */
5838 return KERN_INVALID_ARGUMENT
;
5842 * Obtain a pointer to a subset of the free
5843 * list large enough to satisfy the request;
5844 * the region will be physically contiguous.
5846 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
5848 if (pages
== VM_PAGE_NULL
)
5849 return KERN_NO_SPACE
;
5851 * determine need for wakeups
5853 if ((vm_page_free_count
< vm_page_free_min
) ||
5854 ((vm_page_free_count
< vm_page_free_target
) &&
5855 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
5856 thread_wakeup((event_t
) &vm_page_free_wanted
);
5858 VM_CHECK_MEMORYSTATUS
;
5861 * The CPM pages should now be available and
5862 * ordered by ascending physical address.
5864 assert(vm_page_verify_contiguous(pages
, npages
));
5867 return KERN_SUCCESS
;
5871 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
5874 * when working on a 'run' of pages, it is necessary to hold
5875 * the vm_page_queue_lock (a hot global lock) for certain operations
5876 * on the page... however, the majority of the work can be done
5877 * while merely holding the object lock... in fact there are certain
5878 * collections of pages that don't require any work brokered by the
5879 * vm_page_queue_lock... to mitigate the time spent behind the global
5880 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
5881 * while doing all of the work that doesn't require the vm_page_queue_lock...
5882 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
5883 * necessary work for each page... we will grab the busy bit on the page
5884 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
5885 * if it can't immediately take the vm_page_queue_lock in order to compete
5886 * for the locks in the same order that vm_pageout_scan takes them.
5887 * the operation names are modeled after the names of the routines that
5888 * need to be called in order to make the changes very obvious in the
5893 vm_page_do_delayed_work(
5896 struct vm_page_delayed_work
*dwp
,
5901 vm_page_t local_free_q
= VM_PAGE_NULL
;
5904 * pageout_scan takes the vm_page_lock_queues first
5905 * then tries for the object lock... to avoid what
5906 * is effectively a lock inversion, we'll go to the
5907 * trouble of taking them in that same order... otherwise
5908 * if this object contains the majority of the pages resident
5909 * in the UBC (or a small set of large objects actively being
5910 * worked on contain the majority of the pages), we could
5911 * cause the pageout_scan thread to 'starve' in its attempt
5912 * to find pages to move to the free queue, since it has to
5913 * successfully acquire the object lock of any candidate page
5914 * before it can steal/clean it.
5916 if (!vm_page_trylockspin_queues()) {
5917 vm_object_unlock(object
);
5919 vm_page_lockspin_queues();
5921 for (j
= 0; ; j
++) {
5922 if (!vm_object_lock_avoid(object
) &&
5923 _vm_object_lock_try(object
))
5925 vm_page_unlock_queues();
5927 vm_page_lockspin_queues();
5930 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
5934 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
5935 vm_pageout_throttle_up(m
);
5936 #if CONFIG_PHANTOM_CACHE
5937 if (dwp
->dw_mask
& DW_vm_phantom_cache_update
)
5938 vm_phantom_cache_update(m
);
5940 if (dwp
->dw_mask
& DW_vm_page_wire
)
5941 vm_page_wire(m
, tag
, FALSE
);
5942 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
5945 queueit
= (dwp
->dw_mask
& (DW_vm_page_free
| DW_vm_page_deactivate_internal
)) ? FALSE
: TRUE
;
5947 vm_page_unwire(m
, queueit
);
5949 if (dwp
->dw_mask
& DW_vm_page_free
) {
5950 vm_page_free_prepare_queues(m
);
5952 assert(m
->pageq
.next
== 0 && m
->pageq
.prev
== 0);
5954 * Add this page to our list of reclaimed pages,
5955 * to be freed later.
5957 m
->snext
= local_free_q
;
5960 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
5961 vm_page_deactivate_internal(m
, FALSE
);
5962 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
5963 if (m
->vm_page_q_state
!= VM_PAGE_ON_ACTIVE_Q
) {
5964 vm_page_activate(m
);
5967 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
5968 vm_page_speculate(m
, TRUE
);
5969 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
5971 * if we didn't hold the object lock and did this,
5972 * we might disconnect the page, then someone might
5973 * soft fault it back in, then we would put it on the
5974 * cleaned queue, and so we would have a referenced (maybe even dirty)
5975 * page on that queue, which we don't want
5977 int refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
5979 if ((refmod_state
& VM_MEM_REFERENCED
)) {
5981 * this page has been touched since it got cleaned; let's activate it
5982 * if it hasn't already been
5984 vm_pageout_enqueued_cleaned
++;
5985 vm_pageout_cleaned_reactivated
++;
5986 vm_pageout_cleaned_commit_reactivated
++;
5988 if (m
->vm_page_q_state
!= VM_PAGE_ON_ACTIVE_Q
)
5989 vm_page_activate(m
);
5991 m
->reference
= FALSE
;
5992 vm_page_enqueue_cleaned(m
);
5995 else if (dwp
->dw_mask
& DW_vm_page_lru
)
5997 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
5998 if (m
->vm_page_q_state
!= VM_PAGE_ON_PAGEOUT_Q
)
5999 vm_page_queues_remove(m
, TRUE
);
6001 if (dwp
->dw_mask
& DW_set_reference
)
6002 m
->reference
= TRUE
;
6003 else if (dwp
->dw_mask
& DW_clear_reference
)
6004 m
->reference
= FALSE
;
6006 if (dwp
->dw_mask
& DW_move_page
) {
6007 if (m
->vm_page_q_state
!= VM_PAGE_ON_PAGEOUT_Q
) {
6008 vm_page_queues_remove(m
, FALSE
);
6010 assert(VM_PAGE_OBJECT(m
) != kernel_object
);
6012 vm_page_enqueue_inactive(m
, FALSE
);
6015 if (dwp
->dw_mask
& DW_clear_busy
)
6018 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
6022 vm_page_unlock_queues();
6025 vm_page_free_list(local_free_q
, TRUE
);
6027 VM_CHECK_MEMORYSTATUS
;
6037 vm_page_t lo_page_list
= VM_PAGE_NULL
;
6041 if ( !(flags
& KMA_LOMEM
))
6042 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
6044 for (i
= 0; i
< page_count
; i
++) {
6046 mem
= vm_page_grablo();
6048 if (mem
== VM_PAGE_NULL
) {
6050 vm_page_free_list(lo_page_list
, FALSE
);
6052 *list
= VM_PAGE_NULL
;
6054 return (KERN_RESOURCE_SHORTAGE
);
6056 mem
->snext
= lo_page_list
;
6059 *list
= lo_page_list
;
6061 return (KERN_SUCCESS
);
6065 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
6067 page
->offset
= offset
;
6071 vm_page_get_next(vm_page_t page
)
6073 return (page
->snext
);
6077 vm_page_get_offset(vm_page_t page
)
6079 return (page
->offset
);
6083 vm_page_get_phys_page(vm_page_t page
)
6085 return (VM_PAGE_GET_PHYS_PAGE(page
));
6089 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6093 static vm_page_t hibernate_gobble_queue
;
6095 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
6096 static int hibernate_flush_dirty_pages(int);
6097 static int hibernate_flush_queue(vm_page_queue_head_t
*, int);
6099 void hibernate_flush_wait(void);
6100 void hibernate_mark_in_progress(void);
6101 void hibernate_clear_in_progress(void);
6103 void hibernate_free_range(int, int);
6104 void hibernate_hash_insert_page(vm_page_t
);
6105 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
6106 void hibernate_rebuild_vm_structs(void);
6107 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
6108 ppnum_t
hibernate_lookup_paddr(unsigned int);
6110 struct hibernate_statistics
{
6111 int hibernate_considered
;
6112 int hibernate_reentered_on_q
;
6113 int hibernate_found_dirty
;
6114 int hibernate_skipped_cleaning
;
6115 int hibernate_skipped_transient
;
6116 int hibernate_skipped_precious
;
6117 int hibernate_skipped_external
;
6118 int hibernate_queue_nolock
;
6119 int hibernate_queue_paused
;
6120 int hibernate_throttled
;
6121 int hibernate_throttle_timeout
;
6122 int hibernate_drained
;
6123 int hibernate_drain_timeout
;
6125 int cd_found_precious
;
6128 int cd_found_unusual
;
6129 int cd_found_cleaning
;
6130 int cd_found_laundry
;
6132 int cd_found_xpmapped
;
6133 int cd_skipped_xpmapped
;
6136 int cd_vm_page_wire_count
;
6137 int cd_vm_struct_pages_unneeded
;
6145 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
6146 * so that we don't overrun the estimated image size, which would
6147 * result in a hibernation failure.
6149 #define HIBERNATE_XPMAPPED_LIMIT 40000
6153 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
6155 wait_result_t wait_result
;
6157 vm_page_lock_queues();
6159 while ( !vm_page_queue_empty(&q
->pgo_pending
) ) {
6161 q
->pgo_draining
= TRUE
;
6163 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
6165 vm_page_unlock_queues();
6167 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
6169 if (wait_result
== THREAD_TIMED_OUT
&& !vm_page_queue_empty(&q
->pgo_pending
)) {
6170 hibernate_stats
.hibernate_drain_timeout
++;
6172 if (q
== &vm_pageout_queue_external
)
6177 vm_page_lock_queues();
6179 hibernate_stats
.hibernate_drained
++;
6181 vm_page_unlock_queues();
6187 boolean_t hibernate_skip_external
= FALSE
;
6190 hibernate_flush_queue(vm_page_queue_head_t
*q
, int qcount
)
6193 vm_object_t l_object
= NULL
;
6194 vm_object_t m_object
= NULL
;
6195 int refmod_state
= 0;
6196 int try_failed_count
= 0;
6198 int current_run
= 0;
6199 struct vm_pageout_queue
*iq
;
6200 struct vm_pageout_queue
*eq
;
6201 struct vm_pageout_queue
*tq
;
6203 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
,
6204 VM_KERNEL_UNSLIDE_OR_PERM(q
), qcount
);
6206 iq
= &vm_pageout_queue_internal
;
6207 eq
= &vm_pageout_queue_external
;
6209 vm_page_lock_queues();
6211 while (qcount
&& !vm_page_queue_empty(q
)) {
6213 if (current_run
++ == 1000) {
6214 if (hibernate_should_abort()) {
6221 m
= (vm_page_t
) vm_page_queue_first(q
);
6222 m_object
= VM_PAGE_OBJECT(m
);
6225 * check to see if we currently are working
6226 * with the same object... if so, we've
6227 * already got the lock
6229 if (m_object
!= l_object
) {
6231 * the object associated with candidate page is
6232 * different from the one we were just working
6233 * with... dump the lock if we still own it
6235 if (l_object
!= NULL
) {
6236 vm_object_unlock(l_object
);
6240 * Try to lock object; since we've alread got the
6241 * page queues lock, we can only 'try' for this one.
6242 * if the 'try' fails, we need to do a mutex_pause
6243 * to allow the owner of the object lock a chance to
6246 if ( !vm_object_lock_try_scan(m_object
)) {
6248 if (try_failed_count
> 20) {
6249 hibernate_stats
.hibernate_queue_nolock
++;
6251 goto reenter_pg_on_q
;
6254 vm_page_unlock_queues();
6255 mutex_pause(try_failed_count
++);
6256 vm_page_lock_queues();
6258 hibernate_stats
.hibernate_queue_paused
++;
6261 l_object
= m_object
;
6264 if ( !m_object
->alive
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
6266 * page is not to be cleaned
6267 * put it back on the head of its queue
6270 hibernate_stats
.hibernate_skipped_cleaning
++;
6272 hibernate_stats
.hibernate_skipped_transient
++;
6274 goto reenter_pg_on_q
;
6276 if (m_object
->copy
== VM_OBJECT_NULL
) {
6277 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
6279 * let the normal hibernate image path
6282 goto reenter_pg_on_q
;
6285 if ( !m
->dirty
&& m
->pmapped
) {
6286 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m
));
6288 if ((refmod_state
& VM_MEM_MODIFIED
)) {
6289 SET_PAGE_DIRTY(m
, FALSE
);
6296 * page is not to be cleaned
6297 * put it back on the head of its queue
6300 hibernate_stats
.hibernate_skipped_precious
++;
6302 goto reenter_pg_on_q
;
6305 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
6307 hibernate_stats
.hibernate_skipped_external
++;
6309 goto reenter_pg_on_q
;
6313 if (m_object
->internal
) {
6314 if (VM_PAGE_Q_THROTTLED(iq
))
6316 } else if (VM_PAGE_Q_THROTTLED(eq
))
6320 wait_result_t wait_result
;
6323 if (l_object
!= NULL
) {
6324 vm_object_unlock(l_object
);
6328 while (retval
== 0) {
6330 tq
->pgo_throttled
= TRUE
;
6332 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
6334 vm_page_unlock_queues();
6336 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
6338 vm_page_lock_queues();
6340 if (wait_result
!= THREAD_TIMED_OUT
)
6342 if (!VM_PAGE_Q_THROTTLED(tq
))
6345 if (hibernate_should_abort())
6348 if (--wait_count
== 0) {
6350 hibernate_stats
.hibernate_throttle_timeout
++;
6353 hibernate_skip_external
= TRUE
;
6362 hibernate_stats
.hibernate_throttled
++;
6367 * we've already factored out pages in the laundry which
6368 * means this page can't be on the pageout queue so it's
6369 * safe to do the vm_page_queues_remove
6371 vm_page_queues_remove(m
, TRUE
);
6373 if (m_object
->internal
== TRUE
)
6374 pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m
), PMAP_OPTIONS_COMPRESSOR
, NULL
);
6376 vm_pageout_cluster(m
);
6378 hibernate_stats
.hibernate_found_dirty
++;
6383 vm_page_queue_remove(q
, m
, vm_page_t
, pageq
);
6384 vm_page_queue_enter(q
, m
, vm_page_t
, pageq
);
6386 hibernate_stats
.hibernate_reentered_on_q
++;
6388 hibernate_stats
.hibernate_considered
++;
6391 try_failed_count
= 0;
6393 if (l_object
!= NULL
) {
6394 vm_object_unlock(l_object
);
6398 vm_page_unlock_queues();
6400 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
6407 hibernate_flush_dirty_pages(int pass
)
6409 struct vm_speculative_age_q
*aq
;
6412 if (vm_page_local_q
) {
6413 for (i
= 0; i
< vm_page_local_q_count
; i
++)
6414 vm_page_reactivate_local(i
, TRUE
, FALSE
);
6417 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
6421 aq
= &vm_page_queue_speculative
[i
];
6423 if (vm_page_queue_empty(&aq
->age_q
))
6427 vm_page_lockspin_queues();
6429 vm_page_queue_iterate(&aq
->age_q
,
6436 vm_page_unlock_queues();
6439 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
6443 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
6445 /* XXX FBDP TODO: flush secluded queue */
6446 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
6448 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
6450 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
6454 vm_compressor_record_warmup_start();
6456 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
6458 vm_compressor_record_warmup_end();
6461 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
6463 vm_compressor_record_warmup_end();
6467 vm_compressor_record_warmup_end();
6469 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
))
6477 hibernate_reset_stats()
6479 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
6484 hibernate_flush_memory()
6488 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT
);
6490 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
6492 hibernate_cleaning_in_progress
= TRUE
;
6493 hibernate_skip_external
= FALSE
;
6495 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
6497 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
6499 vm_compressor_flush();
6501 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
6503 if (consider_buffer_cache_collect
!= NULL
) {
6504 unsigned int orig_wire_count
;
6506 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6507 orig_wire_count
= vm_page_wire_count
;
6509 (void)(*consider_buffer_cache_collect
)(1);
6510 consider_zone_gc(FALSE
);
6512 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
6514 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
6517 hibernate_cleaning_in_progress
= FALSE
;
6519 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
6522 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
6525 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
6526 hibernate_stats
.hibernate_considered
,
6527 hibernate_stats
.hibernate_reentered_on_q
,
6528 hibernate_stats
.hibernate_found_dirty
);
6529 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
6530 hibernate_stats
.hibernate_skipped_cleaning
,
6531 hibernate_stats
.hibernate_skipped_transient
,
6532 hibernate_stats
.hibernate_skipped_precious
,
6533 hibernate_stats
.hibernate_skipped_external
,
6534 hibernate_stats
.hibernate_queue_nolock
);
6535 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
6536 hibernate_stats
.hibernate_queue_paused
,
6537 hibernate_stats
.hibernate_throttled
,
6538 hibernate_stats
.hibernate_throttle_timeout
,
6539 hibernate_stats
.hibernate_drained
,
6540 hibernate_stats
.hibernate_drain_timeout
);
6547 hibernate_page_list_zero(hibernate_page_list_t
*list
)
6550 hibernate_bitmap_t
* bitmap
;
6552 bitmap
= &list
->bank_bitmap
[0];
6553 for (bank
= 0; bank
< list
->bank_count
; bank
++)
6557 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
6558 // set out-of-bound bits at end of bitmap.
6559 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
6561 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
6563 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
6568 hibernate_free_gobble_pages(void)
6573 m
= (vm_page_t
) hibernate_gobble_queue
;
6581 hibernate_gobble_queue
= VM_PAGE_NULL
;
6584 HIBLOG("Freed %d pages\n", count
);
6588 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
6590 vm_object_t object
= NULL
;
6592 boolean_t discard
= FALSE
;
6597 panic("hibernate_consider_discard: private");
6599 object
= VM_PAGE_OBJECT(m
);
6601 if (!vm_object_lock_try(object
)) {
6603 if (!preflight
) hibernate_stats
.cd_lock_failed
++;
6606 if (VM_PAGE_WIRED(m
)) {
6607 if (!preflight
) hibernate_stats
.cd_found_wired
++;
6611 if (!preflight
) hibernate_stats
.cd_found_precious
++;
6614 if (m
->busy
|| !object
->alive
) {
6616 * Somebody is playing with this page.
6618 if (!preflight
) hibernate_stats
.cd_found_busy
++;
6621 if (m
->absent
|| m
->unusual
|| m
->error
) {
6623 * If it's unusual in anyway, ignore it
6625 if (!preflight
) hibernate_stats
.cd_found_unusual
++;
6629 if (!preflight
) hibernate_stats
.cd_found_cleaning
++;
6633 if (!preflight
) hibernate_stats
.cd_found_laundry
++;
6638 refmod_state
= pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m
));
6640 if (refmod_state
& VM_MEM_REFERENCED
)
6641 m
->reference
= TRUE
;
6642 if (refmod_state
& VM_MEM_MODIFIED
) {
6643 SET_PAGE_DIRTY(m
, FALSE
);
6648 * If it's clean or purgeable we can discard the page on wakeup.
6650 discard
= (!m
->dirty
)
6651 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
6652 || (VM_PURGABLE_EMPTY
== object
->purgable
);
6655 if (discard
== FALSE
) {
6657 hibernate_stats
.cd_found_dirty
++;
6658 } else if (m
->xpmapped
&& m
->reference
&& !object
->internal
) {
6659 if (hibernate_stats
.cd_found_xpmapped
< HIBERNATE_XPMAPPED_LIMIT
) {
6661 hibernate_stats
.cd_found_xpmapped
++;
6665 hibernate_stats
.cd_skipped_xpmapped
++;
6672 vm_object_unlock(object
);
6679 hibernate_discard_page(vm_page_t m
)
6681 vm_object_t m_object
;
6683 if (m
->absent
|| m
->unusual
|| m
->error
)
6685 * If it's unusual in anyway, ignore
6689 m_object
= VM_PAGE_OBJECT(m
);
6691 #if MACH_ASSERT || DEBUG
6692 if (!vm_object_lock_try(m_object
))
6693 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
6695 /* No need to lock page queue for token delete, hibernate_vm_unlock()
6696 makes sure these locks are uncontended before sleep */
6697 #endif /* MACH_ASSERT || DEBUG */
6699 if (m
->pmapped
== TRUE
)
6701 __unused
int refmod_state
= pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m
));
6705 panic("hibernate_discard_page(%p) laundry", m
);
6707 panic("hibernate_discard_page(%p) private", m
);
6709 panic("hibernate_discard_page(%p) fictitious", m
);
6711 if (VM_PURGABLE_VOLATILE
== m_object
->purgable
)
6713 /* object should be on a queue */
6714 assert((m_object
->objq
.next
!= NULL
) && (m_object
->objq
.prev
!= NULL
));
6715 purgeable_q_t old_queue
= vm_purgeable_object_remove(m_object
);
6717 if (m_object
->purgeable_when_ripe
) {
6718 vm_purgeable_token_delete_first(old_queue
);
6720 vm_object_lock_assert_exclusive(m_object
);
6721 m_object
->purgable
= VM_PURGABLE_EMPTY
;
6724 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
6725 * accounted in the "volatile" ledger, so no change here.
6726 * We have to update vm_page_purgeable_count, though, since we're
6727 * effectively purging this object.
6730 assert(m_object
->resident_page_count
>= m_object
->wired_page_count
);
6731 delta
= (m_object
->resident_page_count
- m_object
->wired_page_count
);
6732 assert(vm_page_purgeable_count
>= delta
);
6734 OSAddAtomic(-delta
, (SInt32
*)&vm_page_purgeable_count
);
6739 #if MACH_ASSERT || DEBUG
6740 vm_object_unlock(m_object
);
6741 #endif /* MACH_ASSERT || DEBUG */
6745 Grab locks for hibernate_page_list_setall()
6748 hibernate_vm_lock_queues(void)
6750 vm_object_lock(compressor_object
);
6751 vm_page_lock_queues();
6752 lck_mtx_lock(&vm_page_queue_free_lock
);
6753 lck_mtx_lock(&vm_purgeable_queue_lock
);
6755 if (vm_page_local_q
) {
6757 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6759 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6760 VPL_LOCK(&lq
->vpl_lock
);
6766 hibernate_vm_unlock_queues(void)
6768 if (vm_page_local_q
) {
6770 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6772 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6773 VPL_UNLOCK(&lq
->vpl_lock
);
6776 lck_mtx_unlock(&vm_purgeable_queue_lock
);
6777 lck_mtx_unlock(&vm_page_queue_free_lock
);
6778 vm_page_unlock_queues();
6779 vm_object_unlock(compressor_object
);
6783 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
6784 pages known to VM to not need saving are subtracted.
6785 Wired pages to be saved are present in page_list_wired, pageable in page_list.
6789 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
6790 hibernate_page_list_t
* page_list_wired
,
6791 hibernate_page_list_t
* page_list_pal
,
6792 boolean_t preflight
,
6793 boolean_t will_discard
,
6794 uint32_t * pagesOut
)
6796 uint64_t start
, end
, nsec
;
6799 uint32_t pages
= page_list
->page_count
;
6800 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
6801 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
6802 uint32_t count_wire
= pages
;
6803 uint32_t count_discard_active
= 0;
6804 uint32_t count_discard_inactive
= 0;
6805 uint32_t count_discard_cleaned
= 0;
6806 uint32_t count_discard_purgeable
= 0;
6807 uint32_t count_discard_speculative
= 0;
6808 uint32_t count_discard_vm_struct_pages
= 0;
6811 hibernate_bitmap_t
* bitmap
;
6812 hibernate_bitmap_t
* bitmap_wired
;
6813 boolean_t discard_all
;
6816 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight
);
6820 page_list_wired
= NULL
;
6821 page_list_pal
= NULL
;
6822 discard_all
= FALSE
;
6824 discard_all
= will_discard
;
6827 #if MACH_ASSERT || DEBUG
6830 assert(hibernate_vm_locks_are_safe());
6831 vm_page_lock_queues();
6832 if (vm_page_local_q
) {
6833 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6835 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6836 VPL_LOCK(&lq
->vpl_lock
);
6840 #endif /* MACH_ASSERT || DEBUG */
6843 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
6845 clock_get_uptime(&start
);
6848 hibernate_page_list_zero(page_list
);
6849 hibernate_page_list_zero(page_list_wired
);
6850 hibernate_page_list_zero(page_list_pal
);
6852 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
6853 hibernate_stats
.cd_pages
= pages
;
6856 if (vm_page_local_q
) {
6857 for (i
= 0; i
< vm_page_local_q_count
; i
++)
6858 vm_page_reactivate_local(i
, TRUE
, !preflight
);
6862 vm_object_lock(compressor_object
);
6863 vm_page_lock_queues();
6864 lck_mtx_lock(&vm_page_queue_free_lock
);
6867 m
= (vm_page_t
) hibernate_gobble_queue
;
6873 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6874 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6879 if (!preflight
) for( i
= 0; i
< real_ncpus
; i
++ )
6881 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
6883 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= m
->snext
)
6885 assert(m
->vm_page_q_state
== VM_PAGE_ON_FREE_LOCAL_Q
);
6889 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6890 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6892 hibernate_stats
.cd_local_free
++;
6893 hibernate_stats
.cd_total_free
++;
6898 for( i
= 0; i
< vm_colors
; i
++ )
6900 vm_page_queue_iterate(&vm_page_queue_free
[i
].qhead
,
6905 assert(m
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
);
6910 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6911 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6913 hibernate_stats
.cd_total_free
++;
6918 vm_page_queue_iterate(&vm_lopage_queue_free
,
6923 assert(m
->vm_page_q_state
== VM_PAGE_ON_FREE_LOPAGE_Q
);
6928 hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6929 hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6931 hibernate_stats
.cd_total_free
++;
6935 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_throttled
);
6936 while (m
&& !vm_page_queue_end(&vm_page_queue_throttled
, (vm_page_queue_entry_t
)m
))
6938 assert(m
->vm_page_q_state
== VM_PAGE_ON_THROTTLED_Q
);
6940 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
6942 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
6943 && hibernate_consider_discard(m
, preflight
))
6945 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6946 count_discard_inactive
++;
6947 discard
= discard_all
;
6952 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6954 if (discard
) hibernate_discard_page(m
);
6958 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_anonymous
);
6959 while (m
&& !vm_page_queue_end(&vm_page_queue_anonymous
, (vm_page_queue_entry_t
)m
))
6961 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
);
6963 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
6965 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
6966 && hibernate_consider_discard(m
, preflight
))
6968 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6970 count_discard_purgeable
++;
6972 count_discard_inactive
++;
6973 discard
= discard_all
;
6978 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6979 if (discard
) hibernate_discard_page(m
);
6983 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_cleaned
);
6984 while (m
&& !vm_page_queue_end(&vm_page_queue_cleaned
, (vm_page_queue_entry_t
)m
))
6986 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
);
6988 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
6990 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
6991 && hibernate_consider_discard(m
, preflight
))
6993 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
6995 count_discard_purgeable
++;
6997 count_discard_cleaned
++;
6998 discard
= discard_all
;
7003 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7004 if (discard
) hibernate_discard_page(m
);
7008 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
7009 while (m
&& !vm_page_queue_end(&vm_page_queue_active
, (vm_page_queue_entry_t
)m
))
7011 assert(m
->vm_page_q_state
== VM_PAGE_ON_ACTIVE_Q
);
7013 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7015 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
7016 && hibernate_consider_discard(m
, preflight
))
7018 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7020 count_discard_purgeable
++;
7022 count_discard_active
++;
7023 discard
= discard_all
;
7028 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7029 if (discard
) hibernate_discard_page(m
);
7033 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
7034 while (m
&& !vm_page_queue_end(&vm_page_queue_inactive
, (vm_page_queue_entry_t
)m
))
7036 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_EXTERNAL_Q
);
7038 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7040 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
7041 && hibernate_consider_discard(m
, preflight
))
7043 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7045 count_discard_purgeable
++;
7047 count_discard_inactive
++;
7048 discard
= discard_all
;
7053 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7054 if (discard
) hibernate_discard_page(m
);
7057 /* XXX FBDP TODO: secluded queue */
7059 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
7061 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_speculative
[i
].age_q
);
7062 while (m
&& !vm_page_queue_end(&vm_page_queue_speculative
[i
].age_q
, (vm_page_queue_entry_t
)m
))
7064 assert(m
->vm_page_q_state
== VM_PAGE_ON_SPECULATIVE_Q
);
7066 next
= (vm_page_t
)VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7068 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
7069 && hibernate_consider_discard(m
, preflight
))
7071 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7072 count_discard_speculative
++;
7073 discard
= discard_all
;
7076 count_speculative
++;
7078 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7079 if (discard
) hibernate_discard_page(m
);
7084 vm_page_queue_iterate(&compressor_object
->memq
, m
, vm_page_t
, listq
)
7086 assert(m
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
);
7090 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, VM_PAGE_GET_PHYS_PAGE(m
));
7093 if (preflight
== FALSE
&& discard_all
== TRUE
) {
7094 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
);
7096 HIBLOG("hibernate_teardown started\n");
7097 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
7098 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
7100 pages
-= count_discard_vm_struct_pages
;
7101 count_wire
-= count_discard_vm_struct_pages
;
7103 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
7105 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_END
);
7109 // pull wired from hibernate_bitmap
7110 bitmap
= &page_list
->bank_bitmap
[0];
7111 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
7112 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
7114 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
7115 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
7116 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
7117 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
7121 // machine dependent adjustments
7122 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
7125 hibernate_stats
.cd_count_wire
= count_wire
;
7126 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
7127 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
7130 clock_get_uptime(&end
);
7131 absolutetime_to_nanoseconds(end
- start
, &nsec
);
7132 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
7134 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
7135 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
7136 discard_all
? "did" : "could",
7137 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
7139 if (hibernate_stats
.cd_skipped_xpmapped
)
7140 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats
.cd_skipped_xpmapped
);
7142 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
7144 if (preflight
&& will_discard
) *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
7146 #if MACH_ASSERT || DEBUG
7149 if (vm_page_local_q
) {
7150 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
7152 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
7153 VPL_UNLOCK(&lq
->vpl_lock
);
7156 vm_page_unlock_queues();
7158 #endif /* MACH_ASSERT || DEBUG */
7161 lck_mtx_unlock(&vm_page_queue_free_lock
);
7162 vm_page_unlock_queues();
7163 vm_object_unlock(compressor_object
);
7166 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
7170 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
7172 uint64_t start
, end
, nsec
;
7176 uint32_t count_discard_active
= 0;
7177 uint32_t count_discard_inactive
= 0;
7178 uint32_t count_discard_purgeable
= 0;
7179 uint32_t count_discard_cleaned
= 0;
7180 uint32_t count_discard_speculative
= 0;
7183 #if MACH_ASSERT || DEBUG
7184 vm_page_lock_queues();
7185 if (vm_page_local_q
) {
7186 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
7188 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
7189 VPL_LOCK(&lq
->vpl_lock
);
7192 #endif /* MACH_ASSERT || DEBUG */
7194 clock_get_uptime(&start
);
7196 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_anonymous
);
7197 while (m
&& !vm_page_queue_end(&vm_page_queue_anonymous
, (vm_page_queue_entry_t
)m
))
7199 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_INTERNAL_Q
);
7201 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7202 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
)))
7205 count_discard_purgeable
++;
7207 count_discard_inactive
++;
7208 hibernate_discard_page(m
);
7213 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
7215 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_speculative
[i
].age_q
);
7216 while (m
&& !vm_page_queue_end(&vm_page_queue_speculative
[i
].age_q
, (vm_page_queue_entry_t
)m
))
7218 assert(m
->vm_page_q_state
== VM_PAGE_ON_SPECULATIVE_Q
);
7220 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7221 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
)))
7223 count_discard_speculative
++;
7224 hibernate_discard_page(m
);
7230 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_inactive
);
7231 while (m
&& !vm_page_queue_end(&vm_page_queue_inactive
, (vm_page_queue_entry_t
)m
))
7233 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_EXTERNAL_Q
);
7235 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7236 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
)))
7239 count_discard_purgeable
++;
7241 count_discard_inactive
++;
7242 hibernate_discard_page(m
);
7246 /* XXX FBDP TODO: secluded queue */
7248 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_active
);
7249 while (m
&& !vm_page_queue_end(&vm_page_queue_active
, (vm_page_queue_entry_t
)m
))
7251 assert(m
->vm_page_q_state
== VM_PAGE_ON_ACTIVE_Q
);
7253 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7254 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
)))
7257 count_discard_purgeable
++;
7259 count_discard_active
++;
7260 hibernate_discard_page(m
);
7265 m
= (vm_page_t
) vm_page_queue_first(&vm_page_queue_cleaned
);
7266 while (m
&& !vm_page_queue_end(&vm_page_queue_cleaned
, (vm_page_queue_entry_t
)m
))
7268 assert(m
->vm_page_q_state
== VM_PAGE_ON_INACTIVE_CLEANED_Q
);
7270 next
= (vm_page_t
) VM_PAGE_UNPACK_PTR(m
->pageq
.next
);
7271 if (hibernate_page_bittst(page_list
, VM_PAGE_GET_PHYS_PAGE(m
)))
7274 count_discard_purgeable
++;
7276 count_discard_cleaned
++;
7277 hibernate_discard_page(m
);
7282 #if MACH_ASSERT || DEBUG
7283 if (vm_page_local_q
) {
7284 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
7286 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
7287 VPL_UNLOCK(&lq
->vpl_lock
);
7290 vm_page_unlock_queues();
7291 #endif /* MACH_ASSERT || DEBUG */
7293 clock_get_uptime(&end
);
7294 absolutetime_to_nanoseconds(end
- start
, &nsec
);
7295 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
7297 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
7300 boolean_t hibernate_paddr_map_inited
= FALSE
;
7301 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
7302 vm_page_t hibernate_rebuild_hash_list
= NULL
;
7304 unsigned int hibernate_teardown_found_tabled_pages
= 0;
7305 unsigned int hibernate_teardown_found_created_pages
= 0;
7306 unsigned int hibernate_teardown_found_free_pages
= 0;
7307 unsigned int hibernate_teardown_vm_page_free_count
;
7310 struct ppnum_mapping
{
7311 struct ppnum_mapping
*ppnm_next
;
7312 ppnum_t ppnm_base_paddr
;
7313 unsigned int ppnm_sindx
;
7314 unsigned int ppnm_eindx
;
7317 struct ppnum_mapping
*ppnm_head
;
7318 struct ppnum_mapping
*ppnm_last_found
= NULL
;
7322 hibernate_create_paddr_map()
7325 ppnum_t next_ppnum_in_run
= 0;
7326 struct ppnum_mapping
*ppnm
= NULL
;
7328 if (hibernate_paddr_map_inited
== FALSE
) {
7330 for (i
= 0; i
< vm_pages_count
; i
++) {
7333 ppnm
->ppnm_eindx
= i
;
7335 if (ppnm
== NULL
|| VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
]) != next_ppnum_in_run
) {
7337 ppnm
= kalloc(sizeof(struct ppnum_mapping
));
7339 ppnm
->ppnm_next
= ppnm_head
;
7342 ppnm
->ppnm_sindx
= i
;
7343 ppnm
->ppnm_base_paddr
= VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
]);
7345 next_ppnum_in_run
= VM_PAGE_GET_PHYS_PAGE(&vm_pages
[i
]) + 1;
7349 hibernate_paddr_map_inited
= TRUE
;
7354 hibernate_lookup_paddr(unsigned int indx
)
7356 struct ppnum_mapping
*ppnm
= NULL
;
7358 ppnm
= ppnm_last_found
;
7361 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
)
7364 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
7366 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
7367 ppnm_last_found
= ppnm
;
7372 panic("hibernate_lookup_paddr of %d failed\n", indx
);
7374 return (ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
));
7379 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
7381 addr64_t saddr_aligned
;
7382 addr64_t eaddr_aligned
;
7385 unsigned int mark_as_unneeded_pages
= 0;
7387 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
7388 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
7390 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
7392 paddr
= pmap_find_phys(kernel_pmap
, addr
);
7396 hibernate_page_bitset(page_list
, TRUE
, paddr
);
7397 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
7399 mark_as_unneeded_pages
++;
7401 return (mark_as_unneeded_pages
);
7406 hibernate_hash_insert_page(vm_page_t mem
)
7408 vm_page_bucket_t
*bucket
;
7410 vm_object_t m_object
;
7412 m_object
= VM_PAGE_OBJECT(mem
);
7414 assert(mem
->hashed
);
7416 assert(mem
->offset
!= (vm_object_offset_t
) -1);
7419 * Insert it into the object_object/offset hash table
7421 hash_id
= vm_page_hash(m_object
, mem
->offset
);
7422 bucket
= &vm_page_buckets
[hash_id
];
7424 mem
->next_m
= bucket
->page_list
;
7425 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
7430 hibernate_free_range(int sindx
, int eindx
)
7435 while (sindx
< eindx
) {
7436 mem
= &vm_pages
[sindx
];
7438 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
7440 mem
->lopage
= FALSE
;
7441 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_Q
;
7443 color
= VM_PAGE_GET_COLOR(mem
);
7444 #if defined(__x86_64__)
7445 vm_page_queue_enter_clump(&vm_page_queue_free
[color
].qhead
,
7450 vm_page_queue_enter(&vm_page_queue_free
[color
].qhead
,
7455 vm_page_free_count
++;
7462 extern void hibernate_rebuild_pmap_structs(void);
7465 hibernate_rebuild_vm_structs(void)
7467 int i
, cindx
, sindx
, eindx
;
7468 vm_page_t mem
, tmem
, mem_next
;
7469 AbsoluteTime startTime
, endTime
;
7472 if (hibernate_rebuild_needed
== FALSE
)
7475 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
);
7476 HIBLOG("hibernate_rebuild started\n");
7478 clock_get_uptime(&startTime
);
7480 hibernate_rebuild_pmap_structs();
7482 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
7483 eindx
= vm_pages_count
;
7486 * Mark all the vm_pages[] that have not been initialized yet as being
7487 * transient. This is needed to ensure that buddy page search is corrrect.
7488 * Without this random data in these vm_pages[] can trip the buddy search
7490 for (i
= hibernate_teardown_last_valid_compact_indx
+1; i
< eindx
; ++i
)
7491 vm_pages
[i
].vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
7493 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
7495 mem
= &vm_pages
[cindx
];
7496 assert(mem
->vm_page_q_state
!= VM_PAGE_ON_FREE_Q
);
7498 * hibernate_teardown_vm_structs leaves the location where
7499 * this vm_page_t must be located in "next".
7501 tmem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->next_m
));
7502 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
7504 sindx
= (int)(tmem
- &vm_pages
[0]);
7508 * this vm_page_t was moved by hibernate_teardown_vm_structs,
7509 * so move it back to its real location
7515 hibernate_hash_insert_page(mem
);
7517 * the 'hole' between this vm_page_t and the previous
7518 * vm_page_t we moved needs to be initialized as
7519 * a range of free vm_page_t's
7521 hibernate_free_range(sindx
+ 1, eindx
);
7526 hibernate_free_range(0, sindx
);
7528 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
7531 * process the list of vm_page_t's that were entered in the hash,
7532 * but were not located in the vm_pages arrary... these are
7533 * vm_page_t's that were created on the fly (i.e. fictitious)
7535 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
7536 mem_next
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->next_m
));
7539 hibernate_hash_insert_page(mem
);
7541 hibernate_rebuild_hash_list
= NULL
;
7543 clock_get_uptime(&endTime
);
7544 SUB_ABSOLUTETIME(&endTime
, &startTime
);
7545 absolutetime_to_nanoseconds(endTime
, &nsec
);
7547 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
7549 hibernate_rebuild_needed
= FALSE
;
7551 KDBG(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
);
7555 extern void hibernate_teardown_pmap_structs(addr64_t
*, addr64_t
*);
7558 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
7561 unsigned int compact_target_indx
;
7562 vm_page_t mem
, mem_next
;
7563 vm_page_bucket_t
*bucket
;
7564 unsigned int mark_as_unneeded_pages
= 0;
7565 unsigned int unneeded_vm_page_bucket_pages
= 0;
7566 unsigned int unneeded_vm_pages_pages
= 0;
7567 unsigned int unneeded_pmap_pages
= 0;
7568 addr64_t start_of_unneeded
= 0;
7569 addr64_t end_of_unneeded
= 0;
7572 if (hibernate_should_abort())
7575 hibernate_rebuild_needed
= TRUE
;
7577 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
7578 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
7579 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
7581 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
7583 bucket
= &vm_page_buckets
[i
];
7585 for (mem
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
)); mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
7586 assert(mem
->hashed
);
7588 mem_next
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(mem
->next_m
));
7590 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
7591 mem
->next_m
= VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list
);
7592 hibernate_rebuild_hash_list
= mem
;
7596 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
7597 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
7599 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
7601 compact_target_indx
= 0;
7603 for (i
= 0; i
< vm_pages_count
; i
++) {
7607 if (mem
->vm_page_q_state
== VM_PAGE_ON_FREE_Q
) {
7611 assert(!mem
->lopage
);
7613 color
= VM_PAGE_GET_COLOR(mem
);
7615 vm_page_queue_remove(&vm_page_queue_free
[color
].qhead
,
7620 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
7622 vm_page_free_count
--;
7624 hibernate_teardown_found_free_pages
++;
7626 if (vm_pages
[compact_target_indx
].vm_page_q_state
!= VM_PAGE_ON_FREE_Q
)
7627 compact_target_indx
= i
;
7630 * record this vm_page_t's original location
7631 * we need this even if it doesn't get moved
7632 * as an indicator to the rebuild function that
7633 * we don't have to move it
7635 mem
->next_m
= VM_PAGE_PACK_PTR(mem
);
7637 if (vm_pages
[compact_target_indx
].vm_page_q_state
== VM_PAGE_ON_FREE_Q
) {
7639 * we've got a hole to fill, so
7640 * move this vm_page_t to it's new home
7642 vm_pages
[compact_target_indx
] = *mem
;
7643 mem
->vm_page_q_state
= VM_PAGE_ON_FREE_Q
;
7645 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
7646 compact_target_indx
++;
7648 hibernate_teardown_last_valid_compact_indx
= i
;
7651 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+1],
7652 (addr64_t
)&vm_pages
[vm_pages_count
-1], page_list
, page_list_wired
);
7653 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
7655 hibernate_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
7657 if (start_of_unneeded
) {
7658 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
7659 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
7661 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
7663 return (mark_as_unneeded_pages
);
7667 #endif /* HIBERNATION */
7669 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
7671 #include <mach_vm_debug.h>
7674 #include <mach_debug/hash_info.h>
7675 #include <vm/vm_debug.h>
7678 * Routine: vm_page_info
7680 * Return information about the global VP table.
7681 * Fills the buffer with as much information as possible
7682 * and returns the desired size of the buffer.
7684 * Nothing locked. The caller should provide
7685 * possibly-pageable memory.
7690 hash_info_bucket_t
*info
,
7694 lck_spin_t
*bucket_lock
;
7696 if (vm_page_bucket_count
< count
)
7697 count
= vm_page_bucket_count
;
7699 for (i
= 0; i
< count
; i
++) {
7700 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
7701 unsigned int bucket_count
= 0;
7704 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
7705 lck_spin_lock(bucket_lock
);
7707 for (m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
));
7709 m
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(m
->next_m
)))
7712 lck_spin_unlock(bucket_lock
);
7714 /* don't touch pageable memory while holding locks */
7715 info
[i
].hib_count
= bucket_count
;
7718 return vm_page_bucket_count
;
7720 #endif /* MACH_VM_DEBUG */
7722 #if VM_PAGE_BUCKETS_CHECK
7724 vm_page_buckets_check(void)
7728 unsigned int p_hash
;
7729 vm_page_bucket_t
*bucket
;
7730 lck_spin_t
*bucket_lock
;
7732 if (!vm_page_buckets_check_ready
) {
7737 if (hibernate_rebuild_needed
||
7738 hibernate_rebuild_hash_list
) {
7739 panic("BUCKET_CHECK: hibernation in progress: "
7740 "rebuild_needed=%d rebuild_hash_list=%p\n",
7741 hibernate_rebuild_needed
,
7742 hibernate_rebuild_hash_list
);
7744 #endif /* HIBERNATION */
7746 #if VM_PAGE_FAKE_BUCKETS
7748 for (cp
= (char *) vm_page_fake_buckets_start
;
7749 cp
< (char *) vm_page_fake_buckets_end
;
7752 panic("BUCKET_CHECK: corruption at %p in fake buckets "
7753 "[0x%llx:0x%llx]\n",
7755 (uint64_t) vm_page_fake_buckets_start
,
7756 (uint64_t) vm_page_fake_buckets_end
);
7759 #endif /* VM_PAGE_FAKE_BUCKETS */
7761 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
7762 vm_object_t p_object
;
7764 bucket
= &vm_page_buckets
[i
];
7765 if (!bucket
->page_list
) {
7769 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
7770 lck_spin_lock(bucket_lock
);
7771 p
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(bucket
->page_list
));
7773 while (p
!= VM_PAGE_NULL
) {
7774 p_object
= VM_PAGE_OBJECT(p
);
7777 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
7778 "hash %d in bucket %d at %p "
7780 p
, p_object
, p
->offset
,
7783 p_hash
= vm_page_hash(p_object
, p
->offset
);
7785 panic("BUCKET_CHECK: corruption in bucket %d "
7786 "at %p: page %p object %p offset 0x%llx "
7788 i
, bucket
, p
, p_object
, p
->offset
,
7791 p
= (vm_page_t
)(VM_PAGE_UNPACK_PTR(p
->next_m
));
7793 lck_spin_unlock(bucket_lock
);
7796 // printf("BUCKET_CHECK: checked buckets\n");
7798 #endif /* VM_PAGE_BUCKETS_CHECK */
7801 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
7802 * local queues if they exist... its the only spot in the system where we add pages
7803 * to those queues... once on those queues, those pages can only move to one of the
7804 * global page queues or the free queues... they NEVER move from local q to local q.
7805 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
7806 * the global vm_page_queue_lock at this point... we still need to take the local lock
7807 * in case this operation is being run on a different CPU then the local queue's identity,
7808 * but we don't have to worry about the page moving to a global queue or becoming wired
7809 * while we're grabbing the local lock since those operations would require the global
7810 * vm_page_queue_lock to be held, and we already own it.
7812 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
7813 * 'wired' and local are ALWAYS mutually exclusive conditions.
7816 #if CONFIG_BACKGROUND_QUEUE
7818 vm_page_queues_remove(vm_page_t mem
, boolean_t remove_from_backgroundq
)
7821 vm_page_queues_remove(vm_page_t mem
, boolean_t __unused remove_from_backgroundq
)
7824 boolean_t was_pageable
= TRUE
;
7825 vm_object_t m_object
;
7827 m_object
= VM_PAGE_OBJECT(mem
);
7829 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
7831 if (mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
)
7833 assert(mem
->pageq
.next
== 0 && mem
->pageq
.prev
== 0);
7834 #if CONFIG_BACKGROUND_QUEUE
7835 if (remove_from_backgroundq
== TRUE
) {
7836 vm_page_remove_from_backgroundq(mem
);
7838 if (mem
->vm_page_on_backgroundq
) {
7839 assert(mem
->vm_page_backgroundq
.next
!= 0);
7840 assert(mem
->vm_page_backgroundq
.prev
!= 0);
7842 assert(mem
->vm_page_backgroundq
.next
== 0);
7843 assert(mem
->vm_page_backgroundq
.prev
== 0);
7845 #endif /* CONFIG_BACKGROUND_QUEUE */
7849 if (mem
->vm_page_q_state
== VM_PAGE_USED_BY_COMPRESSOR
)
7851 assert(mem
->pageq
.next
== 0 && mem
->pageq
.prev
== 0);
7852 #if CONFIG_BACKGROUND_QUEUE
7853 assert(mem
->vm_page_backgroundq
.next
== 0 &&
7854 mem
->vm_page_backgroundq
.prev
== 0 &&
7855 mem
->vm_page_on_backgroundq
== FALSE
);
7859 if (mem
->vm_page_q_state
== VM_PAGE_IS_WIRED
) {
7861 * might put these guys on a list for debugging purposes
7862 * if we do, we'll need to remove this assert
7864 assert(mem
->pageq
.next
== 0 && mem
->pageq
.prev
== 0);
7865 #if CONFIG_BACKGROUND_QUEUE
7866 assert(mem
->vm_page_backgroundq
.next
== 0 &&
7867 mem
->vm_page_backgroundq
.prev
== 0 &&
7868 mem
->vm_page_on_backgroundq
== FALSE
);
7873 assert(m_object
!= compressor_object
);
7874 assert(m_object
!= kernel_object
);
7875 assert(m_object
!= vm_submap_object
);
7876 assert(!mem
->fictitious
);
7878 switch(mem
->vm_page_q_state
) {
7880 case VM_PAGE_ON_ACTIVE_LOCAL_Q
:
7884 lq
= &vm_page_local_q
[mem
->local_id
].vpl_un
.vpl
;
7885 VPL_LOCK(&lq
->vpl_lock
);
7886 vm_page_queue_remove(&lq
->vpl_queue
,
7887 mem
, vm_page_t
, pageq
);
7890 if (m_object
->internal
) {
7891 lq
->vpl_internal_count
--;
7893 lq
->vpl_external_count
--;
7895 VPL_UNLOCK(&lq
->vpl_lock
);
7896 was_pageable
= FALSE
;
7899 case VM_PAGE_ON_ACTIVE_Q
:
7901 vm_page_queue_remove(&vm_page_queue_active
,
7902 mem
, vm_page_t
, pageq
);
7903 vm_page_active_count
--;
7907 case VM_PAGE_ON_INACTIVE_INTERNAL_Q
:
7909 assert(m_object
->internal
== TRUE
);
7911 vm_page_inactive_count
--;
7912 vm_page_queue_remove(&vm_page_queue_anonymous
,
7913 mem
, vm_page_t
, pageq
);
7914 vm_page_anonymous_count
--;
7915 vm_purgeable_q_advance_all();
7919 case VM_PAGE_ON_INACTIVE_EXTERNAL_Q
:
7921 assert(m_object
->internal
== FALSE
);
7923 vm_page_inactive_count
--;
7924 vm_page_queue_remove(&vm_page_queue_inactive
,
7925 mem
, vm_page_t
, pageq
);
7926 vm_purgeable_q_advance_all();
7930 case VM_PAGE_ON_INACTIVE_CLEANED_Q
:
7932 assert(m_object
->internal
== FALSE
);
7934 vm_page_inactive_count
--;
7935 vm_page_queue_remove(&vm_page_queue_cleaned
,
7936 mem
, vm_page_t
, pageq
);
7937 vm_page_cleaned_count
--;
7941 case VM_PAGE_ON_THROTTLED_Q
:
7943 assert(m_object
->internal
== TRUE
);
7945 vm_page_queue_remove(&vm_page_queue_throttled
,
7946 mem
, vm_page_t
, pageq
);
7947 vm_page_throttled_count
--;
7948 was_pageable
= FALSE
;
7952 case VM_PAGE_ON_SPECULATIVE_Q
:
7954 assert(m_object
->internal
== FALSE
);
7956 vm_page_remque(&mem
->pageq
);
7957 vm_page_speculative_count
--;
7961 #if CONFIG_SECLUDED_MEMORY
7962 case VM_PAGE_ON_SECLUDED_Q
:
7964 vm_page_queue_remove(&vm_page_queue_secluded
,
7965 mem
, vm_page_t
, pageq
);
7966 vm_page_secluded_count
--;
7967 if (m_object
== VM_OBJECT_NULL
) {
7968 vm_page_secluded_count_free
--;
7969 was_pageable
= FALSE
;
7971 assert(!m_object
->internal
);
7972 vm_page_secluded_count_inuse
--;
7973 was_pageable
= FALSE
;
7974 // was_pageable = TRUE;
7978 #endif /* CONFIG_SECLUDED_MEMORY */
7983 * if (mem->vm_page_q_state == VM_PAGE_ON_PAGEOUT_Q)
7984 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
7985 * the caller is responsible for determing if the page is on that queue, and if so, must
7986 * either first remove it (it needs both the page queues lock and the object lock to do
7987 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
7989 * we also don't expect to encounter VM_PAGE_ON_FREE_Q, VM_PAGE_ON_FREE_LOCAL_Q, VM_PAGE_ON_FREE_LOPAGE_Q
7990 * or any of the undefined states
7992 panic("vm_page_queues_remove - bad page q_state (%p, %d)\n", mem
, mem
->vm_page_q_state
);
7997 VM_PAGE_ZERO_PAGEQ_ENTRY(mem
);
7998 mem
->vm_page_q_state
= VM_PAGE_NOT_ON_Q
;
8000 #if CONFIG_BACKGROUND_QUEUE
8001 if (remove_from_backgroundq
== TRUE
)
8002 vm_page_remove_from_backgroundq(mem
);
8005 if (m_object
->internal
) {
8006 vm_page_pageable_internal_count
--;
8008 vm_page_pageable_external_count
--;
8014 vm_page_remove_internal(vm_page_t page
)
8016 vm_object_t __object
= VM_PAGE_OBJECT(page
);
8017 if (page
== __object
->memq_hint
) {
8018 vm_page_t __new_hint
;
8019 vm_page_queue_entry_t __qe
;
8020 __qe
= (vm_page_queue_entry_t
)vm_page_queue_next(&page
->listq
);
8021 if (vm_page_queue_end(&__object
->memq
, __qe
)) {
8022 __qe
= (vm_page_queue_entry_t
)vm_page_queue_prev(&page
->listq
);
8023 if (vm_page_queue_end(&__object
->memq
, __qe
)) {
8027 __new_hint
= (vm_page_t
)((uintptr_t) __qe
);
8028 __object
->memq_hint
= __new_hint
;
8030 vm_page_queue_remove(&__object
->memq
, page
, vm_page_t
, listq
);
8031 #if CONFIG_SECLUDED_MEMORY
8032 if (__object
->eligible_for_secluded
) {
8033 vm_page_secluded
.eligible_for_secluded
--;
8035 #endif /* CONFIG_SECLUDED_MEMORY */
8039 vm_page_enqueue_inactive(vm_page_t mem
, boolean_t first
)
8041 vm_object_t m_object
;
8043 m_object
= VM_PAGE_OBJECT(mem
);
8045 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
8046 assert(!mem
->fictitious
);
8047 assert(!mem
->laundry
);
8048 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
8049 vm_page_check_pageable_safe(mem
);
8051 if (m_object
->internal
) {
8052 mem
->vm_page_q_state
= VM_PAGE_ON_INACTIVE_INTERNAL_Q
;
8055 vm_page_queue_enter_first(&vm_page_queue_anonymous
, mem
, vm_page_t
, pageq
);
8057 vm_page_queue_enter(&vm_page_queue_anonymous
, mem
, vm_page_t
, pageq
);
8059 vm_page_anonymous_count
++;
8060 vm_page_pageable_internal_count
++;
8062 mem
->vm_page_q_state
= VM_PAGE_ON_INACTIVE_EXTERNAL_Q
;
8065 vm_page_queue_enter_first(&vm_page_queue_inactive
, mem
, vm_page_t
, pageq
);
8067 vm_page_queue_enter(&vm_page_queue_inactive
, mem
, vm_page_t
, pageq
);
8069 vm_page_pageable_external_count
++;
8071 vm_page_inactive_count
++;
8072 token_new_pagecount
++;
8074 #if CONFIG_BACKGROUND_QUEUE
8075 if (mem
->vm_page_in_background
)
8076 vm_page_add_to_backgroundq(mem
, FALSE
);
8081 vm_page_enqueue_active(vm_page_t mem
, boolean_t first
)
8083 vm_object_t m_object
;
8085 m_object
= VM_PAGE_OBJECT(mem
);
8087 LCK_MTX_ASSERT(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
8088 assert(!mem
->fictitious
);
8089 assert(!mem
->laundry
);
8090 assert(mem
->vm_page_q_state
== VM_PAGE_NOT_ON_Q
);
8091 vm_page_check_pageable_safe(mem
);
8093 mem
->vm_page_q_state
= VM_PAGE_ON_ACTIVE_Q
;
8095 vm_page_queue_enter_first(&vm_page_queue_active
, mem
, vm_page_t
, pageq
);
8097 vm_page_queue_enter(&vm_page_queue_active
, mem
, vm_page_t
, pageq
);
8098 vm_page_active_count
++;
8100 if (m_object
->internal
) {
8101 vm_page_pageable_internal_count
++;
8103 vm_page_pageable_external_count
++;
8106 #if CONFIG_BACKGROUND_QUEUE
8107 if (mem
->vm_page_in_background
)
8108 vm_page_add_to_backgroundq(mem
, FALSE
);
8113 * Pages from special kernel objects shouldn't
8114 * be placed on pageable queues.
8117 vm_page_check_pageable_safe(vm_page_t page
)
8119 vm_object_t page_object
;
8121 page_object
= VM_PAGE_OBJECT(page
);
8123 if (page_object
== kernel_object
) {
8124 panic("vm_page_check_pageable_safe: trying to add page" \
8125 "from kernel object (%p) to pageable queue", kernel_object
);
8128 if (page_object
== compressor_object
) {
8129 panic("vm_page_check_pageable_safe: trying to add page" \
8130 "from compressor object (%p) to pageable queue", compressor_object
);
8133 if (page_object
== vm_submap_object
) {
8134 panic("vm_page_check_pageable_safe: trying to add page" \
8135 "from submap object (%p) to pageable queue", vm_submap_object
);
8139 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
8140 * wired page diagnose
8141 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
8143 #include <libkern/OSKextLibPrivate.h>
8145 #define KA_SIZE(namelen, subtotalscount) \
8146 (sizeof(struct vm_allocation_site) + (namelen) + 1 + ((subtotalscount) * sizeof(struct vm_allocation_total)))
8148 #define KA_NAME(alloc) \
8149 ((char *)(&(alloc)->subtotals[(alloc->subtotalscount)]))
8151 #define KA_NAME_LEN(alloc) \
8152 (VM_TAG_NAME_LEN_MAX & (alloc->flags >> VM_TAG_NAME_LEN_SHIFT))
8157 uintptr_t* frameptr
;
8158 uintptr_t* frameptr_next
;
8160 uintptr_t kstackb
, kstackt
;
8161 const vm_allocation_site_t
* site
;
8163 kern_allocation_name_t name
;
8165 cthread
= current_thread();
8166 if (__improbable(cthread
== NULL
)) return VM_KERN_MEMORY_OSFMK
;
8168 if ((name
= thread_get_kernel_state(cthread
)->allocation_name
))
8170 if (!name
->tag
) vm_tag_alloc(name
);
8174 kstackb
= cthread
->kernel_stack
;
8175 kstackt
= kstackb
+ kernel_stack_size
;
8177 /* Load stack frame pointer (EBP on x86) into frameptr */
8178 frameptr
= __builtin_frame_address(0);
8180 while (frameptr
!= NULL
)
8182 /* Verify thread stack bounds */
8183 if (((uintptr_t)(frameptr
+ 2) > kstackt
) || ((uintptr_t)frameptr
< kstackb
)) break;
8185 /* Next frame pointer is pointed to by the previous one */
8186 frameptr_next
= (uintptr_t*) *frameptr
;
8188 /* Pull return address from one spot above the frame pointer */
8189 retaddr
= *(frameptr
+ 1);
8192 if ((retaddr
< vm_kernel_stext
) || (retaddr
> vm_kernel_top
))
8194 site
= OSKextGetAllocationSiteForCaller(retaddr
);
8197 frameptr
= frameptr_next
;
8200 return (site
? site
->tag
: VM_KERN_MEMORY_NONE
);
8203 static uint64_t free_tag_bits
[VM_MAX_TAG_VALUE
/64];
8206 vm_tag_alloc_locked(vm_allocation_site_t
* site
, vm_allocation_site_t
** releasesiteP
)
8211 vm_allocation_site_t
* prev
;
8213 if (site
->tag
) return;
8218 avail
= free_tag_bits
[idx
];
8221 tag
= __builtin_clzll(avail
);
8222 avail
&= ~(1ULL << (63 - tag
));
8223 free_tag_bits
[idx
] = avail
;
8228 if (idx
>= ARRAY_COUNT(free_tag_bits
))
8230 for (idx
= 0; idx
< ARRAY_COUNT(vm_allocation_sites
); idx
++)
8232 prev
= vm_allocation_sites
[idx
];
8233 if (!prev
) continue;
8234 if (!KA_NAME_LEN(prev
)) continue;
8235 if (!prev
->tag
) continue;
8236 if (prev
->total
) continue;
8237 if (1 != prev
->refcount
) continue;
8239 assert(idx
== prev
->tag
);
8241 prev
->tag
= VM_KERN_MEMORY_NONE
;
8242 *releasesiteP
= prev
;
8245 if (idx
>= ARRAY_COUNT(vm_allocation_sites
))
8247 tag
= VM_KERN_MEMORY_ANY
;
8254 OSAddAtomic16(1, &site
->refcount
);
8256 if (VM_KERN_MEMORY_ANY
!= tag
) vm_allocation_sites
[tag
] = site
;
8258 if (tag
> vm_allocation_tag_highest
) vm_allocation_tag_highest
= tag
;
8262 vm_tag_free_locked(vm_tag_t tag
)
8268 if (VM_KERN_MEMORY_ANY
== tag
) return;
8271 avail
= free_tag_bits
[idx
];
8273 bit
= (1ULL << (63 - tag
));
8274 assert(!(avail
& bit
));
8275 free_tag_bits
[idx
] = (avail
| bit
);
8282 for (tag
= VM_KERN_MEMORY_FIRST_DYNAMIC
; tag
< VM_KERN_MEMORY_ANY
; tag
++)
8284 vm_tag_free_locked(tag
);
8287 for (tag
= VM_KERN_MEMORY_ANY
+ 1; tag
< VM_MAX_TAG_VALUE
; tag
++)
8289 vm_tag_free_locked(tag
);
8294 vm_tag_alloc(vm_allocation_site_t
* site
)
8297 vm_allocation_site_t
* releasesite
;
8299 if (VM_TAG_BT
& site
->flags
)
8302 if (VM_KERN_MEMORY_NONE
!= tag
) return (tag
);
8308 lck_spin_lock(&vm_allocation_sites_lock
);
8309 vm_tag_alloc_locked(site
, &releasesite
);
8310 lck_spin_unlock(&vm_allocation_sites_lock
);
8311 if (releasesite
) kern_allocation_name_release(releasesite
);
8318 vm_tag_update_size(vm_tag_t tag
, int64_t delta
)
8320 vm_allocation_site_t
* allocation
;
8323 assert(VM_KERN_MEMORY_NONE
!= tag
);
8324 assert(tag
< VM_MAX_TAG_VALUE
);
8326 allocation
= vm_allocation_sites
[tag
];
8330 assertf(allocation
->total
>= ((uint64_t)-delta
), "tag %d, site %p", tag
, allocation
);
8332 prior
= OSAddAtomic64(delta
, &allocation
->total
);
8334 #if DEBUG || DEVELOPMENT
8337 new = prior
+ delta
;
8340 peak
= allocation
->peak
;
8341 if (new <= peak
) break;
8343 while (!OSCompareAndSwap64(peak
, new, &allocation
->peak
));
8345 #endif /* DEBUG || DEVELOPMENT */
8347 if (tag
< VM_KERN_MEMORY_FIRST_DYNAMIC
) return;
8349 if (!prior
&& !allocation
->tag
) vm_tag_alloc(allocation
);
8353 kern_allocation_update_size(kern_allocation_name_t allocation
, int64_t delta
)
8358 assertf(allocation
->total
>= ((uint64_t)-delta
), "name %p", allocation
);
8360 prior
= OSAddAtomic64(delta
, &allocation
->total
);
8362 #if DEBUG || DEVELOPMENT
8365 new = prior
+ delta
;
8368 peak
= allocation
->peak
;
8369 if (new <= peak
) break;
8371 while (!OSCompareAndSwap64(peak
, new, &allocation
->peak
));
8373 #endif /* DEBUG || DEVELOPMENT */
8375 if (!prior
&& !allocation
->tag
) vm_tag_alloc(allocation
);
8378 #if VM_MAX_TAG_ZONES
8381 vm_allocation_zones_init(void)
8387 size
= VM_MAX_TAG_VALUE
* sizeof(vm_allocation_zone_total_t
**)
8388 + 2 * VM_MAX_TAG_ZONES
* sizeof(vm_allocation_zone_total_t
);
8390 ret
= kernel_memory_allocate(kernel_map
,
8391 &addr
, round_page(size
), 0,
8392 KMA_ZERO
, VM_KERN_MEMORY_DIAG
);
8393 assert(KERN_SUCCESS
== ret
);
8395 vm_allocation_zone_totals
= (vm_allocation_zone_total_t
**) addr
;
8396 addr
+= VM_MAX_TAG_VALUE
* sizeof(vm_allocation_zone_total_t
**);
8398 // prepopulate VM_KERN_MEMORY_DIAG & VM_KERN_MEMORY_KALLOC so allocations
8399 // in vm_tag_update_zone_size() won't recurse
8400 vm_allocation_zone_totals
[VM_KERN_MEMORY_DIAG
] = (vm_allocation_zone_total_t
*) addr
;
8401 addr
+= VM_MAX_TAG_ZONES
* sizeof(vm_allocation_zone_total_t
);
8402 vm_allocation_zone_totals
[VM_KERN_MEMORY_KALLOC
] = (vm_allocation_zone_total_t
*) addr
;
8406 vm_tag_will_update_zone(vm_tag_t tag
, uint32_t zidx
)
8408 vm_allocation_zone_total_t
* zone
;
8410 assert(VM_KERN_MEMORY_NONE
!= tag
);
8411 assert(tag
< VM_MAX_TAG_VALUE
);
8413 if (zidx
>= VM_MAX_TAG_ZONES
) return;
8415 zone
= vm_allocation_zone_totals
[tag
];
8418 zone
= kalloc_tag(VM_MAX_TAG_ZONES
* sizeof(*zone
), VM_KERN_MEMORY_DIAG
);
8420 bzero(zone
, VM_MAX_TAG_ZONES
* sizeof(*zone
));
8421 if (!OSCompareAndSwapPtr(NULL
, zone
, &vm_allocation_zone_totals
[tag
]))
8423 kfree(zone
, VM_MAX_TAG_ZONES
* sizeof(*zone
));
8429 vm_tag_update_zone_size(vm_tag_t tag
, uint32_t zidx
, int64_t delta
, int64_t dwaste
)
8431 vm_allocation_zone_total_t
* zone
;
8434 assert(VM_KERN_MEMORY_NONE
!= tag
);
8435 assert(tag
< VM_MAX_TAG_VALUE
);
8437 if (zidx
>= VM_MAX_TAG_ZONES
) return;
8439 zone
= vm_allocation_zone_totals
[tag
];
8443 /* the zone is locked */
8446 assertf(zone
->total
>= ((uint64_t)-delta
), "zidx %d, tag %d, %p", zidx
, tag
, zone
);
8447 zone
->total
+= delta
;
8451 zone
->total
+= delta
;
8452 if (zone
->total
> zone
->peak
) zone
->peak
= zone
->total
;
8456 if (zone
->wastediv
< 65536) zone
->wastediv
++;
8457 else new -= (new >> 16);
8458 __assert_only
bool ov
= os_add_overflow(new, dwaste
, &new);
8465 #endif /* VM_MAX_TAG_ZONES */
8468 kern_allocation_update_subtotal(kern_allocation_name_t allocation
, uint32_t subtag
, int64_t delta
)
8470 kern_allocation_name_t other
;
8471 struct vm_allocation_total
* total
;
8475 assert(VM_KERN_MEMORY_NONE
!= subtag
);
8476 for (; subidx
< allocation
->subtotalscount
; subidx
++)
8478 if (VM_KERN_MEMORY_NONE
== allocation
->subtotals
[subidx
].tag
)
8480 allocation
->subtotals
[subidx
].tag
= subtag
;
8483 if (subtag
== allocation
->subtotals
[subidx
].tag
) break;
8485 assert(subidx
< allocation
->subtotalscount
);
8486 if (subidx
>= allocation
->subtotalscount
) return;
8488 total
= &allocation
->subtotals
[subidx
];
8489 other
= vm_allocation_sites
[subtag
];
8494 assertf(total
->total
>= ((uint64_t)-delta
), "name %p", allocation
);
8495 OSAddAtomic64(delta
, &total
->total
);
8496 assertf(other
->mapped
>= ((uint64_t)-delta
), "other %p", other
);
8497 OSAddAtomic64(delta
, &other
->mapped
);
8501 OSAddAtomic64(delta
, &other
->mapped
);
8502 OSAddAtomic64(delta
, &total
->total
);
8507 kern_allocation_get_name(kern_allocation_name_t allocation
)
8509 return (KA_NAME(allocation
));
8512 kern_allocation_name_t
8513 kern_allocation_name_allocate(const char * name
, uint32_t subtotalscount
)
8517 namelen
= (uint32_t) strnlen(name
, MACH_MEMORY_INFO_NAME_MAX_LEN
- 1);
8519 kern_allocation_name_t allocation
;
8520 allocation
= kalloc(KA_SIZE(namelen
, subtotalscount
));
8521 bzero(allocation
, KA_SIZE(namelen
, subtotalscount
));
8523 allocation
->refcount
= 1;
8524 allocation
->subtotalscount
= subtotalscount
;
8525 allocation
->flags
= (namelen
<< VM_TAG_NAME_LEN_SHIFT
);
8526 strlcpy(KA_NAME(allocation
), name
, namelen
+ 1);
8528 return (allocation
);
8532 kern_allocation_name_release(kern_allocation_name_t allocation
)
8534 assert(allocation
->refcount
> 0);
8535 if (1 == OSAddAtomic16(-1, &allocation
->refcount
))
8537 kfree(allocation
, KA_SIZE(KA_NAME_LEN(allocation
), allocation
->subtotalscount
));
8542 kern_allocation_name_get_vm_tag(kern_allocation_name_t allocation
)
8544 return (vm_tag_alloc(allocation
));
8548 vm_page_count_object(mach_memory_info_t
* info
, unsigned int __unused num_info
, vm_object_t object
)
8550 if (!object
->wired_page_count
) return;
8551 if (object
!= kernel_object
)
8553 assert(object
->wire_tag
< num_info
);
8554 info
[object
->wire_tag
].size
+= ptoa_64(object
->wired_page_count
);
8558 typedef void (*vm_page_iterate_proc
)(mach_memory_info_t
* info
,
8559 unsigned int num_info
, vm_object_t object
);
8562 vm_page_iterate_purgeable_objects(mach_memory_info_t
* info
, unsigned int num_info
,
8563 vm_page_iterate_proc proc
, purgeable_q_t queue
,
8568 for (object
= (vm_object_t
) queue_first(&queue
->objq
[group
]);
8569 !queue_end(&queue
->objq
[group
], (queue_entry_t
) object
);
8570 object
= (vm_object_t
) queue_next(&object
->objq
))
8572 proc(info
, num_info
, object
);
8577 vm_page_iterate_objects(mach_memory_info_t
* info
, unsigned int num_info
,
8578 vm_page_iterate_proc proc
)
8580 purgeable_q_t volatile_q
;
8581 queue_head_t
* nonvolatile_q
;
8585 lck_spin_lock(&vm_objects_wired_lock
);
8586 queue_iterate(&vm_objects_wired
,
8591 proc(info
, num_info
, object
);
8593 lck_spin_unlock(&vm_objects_wired_lock
);
8595 lck_mtx_lock(&vm_purgeable_queue_lock
);
8596 nonvolatile_q
= &purgeable_nonvolatile_queue
;
8597 for (object
= (vm_object_t
) queue_first(nonvolatile_q
);
8598 !queue_end(nonvolatile_q
, (queue_entry_t
) object
);
8599 object
= (vm_object_t
) queue_next(&object
->objq
))
8601 proc(info
, num_info
, object
);
8604 volatile_q
= &purgeable_queues
[PURGEABLE_Q_TYPE_OBSOLETE
];
8605 vm_page_iterate_purgeable_objects(info
, num_info
, proc
, volatile_q
, 0);
8607 volatile_q
= &purgeable_queues
[PURGEABLE_Q_TYPE_FIFO
];
8608 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
8610 vm_page_iterate_purgeable_objects(info
, num_info
, proc
, volatile_q
, group
);
8613 volatile_q
= &purgeable_queues
[PURGEABLE_Q_TYPE_LIFO
];
8614 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
8616 vm_page_iterate_purgeable_objects(info
, num_info
, proc
, volatile_q
, group
);
8618 lck_mtx_unlock(&vm_purgeable_queue_lock
);
8622 process_account(mach_memory_info_t
* info
, unsigned int num_info
, uint64_t zones_collectable_bytes
, boolean_t iterated
)
8625 unsigned int idx
, count
, nextinfo
;
8626 vm_allocation_site_t
* site
;
8627 lck_spin_lock(&vm_allocation_sites_lock
);
8629 for (idx
= 0; idx
<= vm_allocation_tag_highest
; idx
++)
8631 site
= vm_allocation_sites
[idx
];
8632 if (!site
) continue;
8633 info
[idx
].mapped
= site
->mapped
;
8634 info
[idx
].tag
= site
->tag
;
8637 info
[idx
].size
= site
->total
;
8638 #if DEBUG || DEVELOPMENT
8639 info
[idx
].peak
= site
->peak
;
8640 #endif /* DEBUG || DEVELOPMENT */
8644 if (!site
->subtotalscount
&& (site
->total
!= info
[idx
].size
))
8646 printf("tag mismatch[%d] 0x%qx, iter 0x%qx\n", idx
, site
->total
, info
[idx
].size
);
8647 info
[idx
].size
= site
->total
;
8652 nextinfo
= (vm_allocation_tag_highest
+ 1);
8654 if (count
>= num_info
) count
= num_info
;
8656 for (idx
= 0; idx
< count
; idx
++)
8658 site
= vm_allocation_sites
[idx
];
8659 if (!site
) continue;
8660 info
[idx
].flags
|= VM_KERN_SITE_WIRED
;
8661 if (idx
< VM_KERN_MEMORY_FIRST_DYNAMIC
)
8663 info
[idx
].site
= idx
;
8664 info
[idx
].flags
|= VM_KERN_SITE_TAG
;
8665 if (VM_KERN_MEMORY_ZONE
== idx
)
8667 info
[idx
].flags
|= VM_KERN_SITE_HIDE
;
8668 info
[idx
].flags
&= ~VM_KERN_SITE_WIRED
;
8669 info
[idx
].collectable_bytes
= zones_collectable_bytes
;
8672 else if ((namelen
= (VM_TAG_NAME_LEN_MAX
& (site
->flags
>> VM_TAG_NAME_LEN_SHIFT
))))
8675 info
[idx
].flags
|= VM_KERN_SITE_NAMED
;
8676 if (namelen
> sizeof(info
[idx
].name
)) namelen
= sizeof(info
[idx
].name
);
8677 strncpy(&info
[idx
].name
[0], KA_NAME(site
), namelen
);
8679 else if (VM_TAG_KMOD
& site
->flags
)
8681 info
[idx
].site
= OSKextGetKmodIDForSite(site
, NULL
, 0);
8682 info
[idx
].flags
|= VM_KERN_SITE_KMOD
;
8686 info
[idx
].site
= VM_KERNEL_UNSLIDE(site
);
8687 info
[idx
].flags
|= VM_KERN_SITE_KERNEL
;
8689 #if VM_MAX_TAG_ZONES
8690 vm_allocation_zone_total_t
* zone
;
8692 vm_size_t elem_size
;
8694 if (vm_allocation_zone_totals
8695 && (zone
= vm_allocation_zone_totals
[idx
])
8696 && (nextinfo
< num_info
))
8698 for (zidx
= 0; zidx
< VM_MAX_TAG_ZONES
; zidx
++)
8700 if (!zone
[zidx
].peak
) continue;
8701 info
[nextinfo
] = info
[idx
];
8702 info
[nextinfo
].zone
= zone_index_from_tag_index(zidx
, &elem_size
);
8703 info
[nextinfo
].flags
&= ~VM_KERN_SITE_WIRED
;
8704 info
[nextinfo
].flags
|= VM_KERN_SITE_ZONE
;
8705 info
[nextinfo
].size
= zone
[zidx
].total
;
8706 info
[nextinfo
].peak
= zone
[zidx
].peak
;
8707 info
[nextinfo
].mapped
= 0;
8708 if (zone
[zidx
].wastediv
)
8710 info
[nextinfo
].collectable_bytes
= ((zone
[zidx
].waste
* zone
[zidx
].total
/ elem_size
) / zone
[zidx
].wastediv
);
8715 #endif /* VM_MAX_TAG_ZONES */
8716 if (site
->subtotalscount
)
8718 uint64_t mapped
, mapcost
, take
;
8722 info
[idx
].size
= site
->total
;
8723 mapped
= info
[idx
].size
;
8724 info
[idx
].mapped
= mapped
;
8726 for (sub
= 0; sub
< site
->subtotalscount
; sub
++)
8728 alloctag
= site
->subtotals
[sub
].tag
;
8729 assert(alloctag
< num_info
);
8730 if (info
[alloctag
].name
[0]) continue;
8731 take
= info
[alloctag
].mapped
;
8732 if (take
> info
[alloctag
].size
) take
= info
[alloctag
].size
;
8733 if (take
> mapped
) take
= mapped
;
8734 info
[alloctag
].mapped
-= take
;
8735 info
[alloctag
].size
-= take
;
8739 info
[idx
].size
= mapcost
;
8742 lck_spin_unlock(&vm_allocation_sites_lock
);
8748 vm_page_diagnose_estimate(void)
8750 vm_allocation_site_t
* site
;
8754 lck_spin_lock(&vm_allocation_sites_lock
);
8755 for (count
= idx
= 0; idx
< VM_MAX_TAG_VALUE
; idx
++)
8757 site
= vm_allocation_sites
[idx
];
8758 if (!site
) continue;
8760 #if VM_MAX_TAG_ZONES
8761 if (vm_allocation_zone_totals
)
8763 vm_allocation_zone_total_t
* zone
;
8764 zone
= vm_allocation_zone_totals
[idx
];
8765 if (!zone
) continue;
8766 for (uint32_t zidx
= 0; zidx
< VM_MAX_TAG_ZONES
; zidx
++) if (zone
[zidx
].peak
) count
++;
8770 lck_spin_unlock(&vm_allocation_sites_lock
);
8772 /* some slop for new tags created */
8774 count
+= VM_KERN_COUNTER_COUNT
;
8781 vm_page_diagnose(mach_memory_info_t
* info
, unsigned int num_info
, uint64_t zones_collectable_bytes
)
8783 uint64_t wired_size
;
8784 uint64_t wired_managed_size
;
8785 uint64_t wired_reserved_size
;
8786 uint64_t booter_size
;
8788 mach_memory_info_t
* counts
;
8790 bzero(info
, num_info
* sizeof(mach_memory_info_t
));
8792 if (!vm_page_wire_count_initial
) return (KERN_ABORTED
);
8795 wired_size
= ptoa_64(vm_page_wire_count
);
8796 wired_reserved_size
= ptoa_64(vm_page_wire_count_initial
- vm_page_stolen_count
);
8798 wired_size
= ptoa_64(vm_page_wire_count
+ vm_lopage_free_count
+ vm_page_throttled_count
);
8799 wired_reserved_size
= ptoa_64(vm_page_wire_count_initial
- vm_page_stolen_count
+ vm_page_throttled_count
);
8801 wired_managed_size
= ptoa_64(vm_page_wire_count
- vm_page_wire_count_initial
);
8803 booter_size
= ml_get_booter_memory_size();
8804 wired_size
+= booter_size
;
8806 assert(num_info
>= VM_KERN_COUNTER_COUNT
);
8807 num_info
-= VM_KERN_COUNTER_COUNT
;
8808 counts
= &info
[num_info
];
8810 #define SET_COUNT(xcount, xsize, xflags) \
8811 counts[xcount].tag = VM_MAX_TAG_VALUE + xcount; \
8812 counts[xcount].site = (xcount); \
8813 counts[xcount].size = (xsize); \
8814 counts[xcount].mapped = (xsize); \
8815 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
8817 SET_COUNT(VM_KERN_COUNT_MANAGED
, ptoa_64(vm_page_pages
), 0);
8818 SET_COUNT(VM_KERN_COUNT_WIRED
, wired_size
, 0);
8819 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED
, wired_managed_size
, 0);
8820 SET_COUNT(VM_KERN_COUNT_RESERVED
, wired_reserved_size
, VM_KERN_SITE_WIRED
);
8821 SET_COUNT(VM_KERN_COUNT_STOLEN
, ptoa_64(vm_page_stolen_count
), VM_KERN_SITE_WIRED
);
8822 SET_COUNT(VM_KERN_COUNT_LOPAGE
, ptoa_64(vm_lopage_free_count
), VM_KERN_SITE_WIRED
);
8823 SET_COUNT(VM_KERN_COUNT_WIRED_BOOT
, ptoa_64(vm_page_wire_count_on_boot
), 0);
8824 SET_COUNT(VM_KERN_COUNT_BOOT_STOLEN
, booter_size
, VM_KERN_SITE_WIRED
);
8826 #define SET_MAP(xcount, xsize, xfree, xlargest) \
8827 counts[xcount].site = (xcount); \
8828 counts[xcount].size = (xsize); \
8829 counts[xcount].mapped = (xsize); \
8830 counts[xcount].free = (xfree); \
8831 counts[xcount].largest = (xlargest); \
8832 counts[xcount].flags = VM_KERN_SITE_COUNTER;
8834 vm_map_size_t map_size
, map_free
, map_largest
;
8836 vm_map_sizes(kernel_map
, &map_size
, &map_free
, &map_largest
);
8837 SET_MAP(VM_KERN_COUNT_MAP_KERNEL
, map_size
, map_free
, map_largest
);
8839 vm_map_sizes(zone_map
, &map_size
, &map_free
, &map_largest
);
8840 SET_MAP(VM_KERN_COUNT_MAP_ZONE
, map_size
, map_free
, map_largest
);
8842 vm_map_sizes(kalloc_map
, &map_size
, &map_free
, &map_largest
);
8843 SET_MAP(VM_KERN_COUNT_MAP_KALLOC
, map_size
, map_free
, map_largest
);
8845 iterate
= !VM_TAG_ACTIVE_UPDATE
;
8848 enum { kMaxKernelDepth
= 1 };
8849 vm_map_t maps
[kMaxKernelDepth
];
8850 vm_map_entry_t entries
[kMaxKernelDepth
];
8852 vm_map_entry_t entry
;
8853 vm_object_offset_t offset
;
8855 int stackIdx
, count
;
8857 vm_page_iterate_objects(info
, num_info
, &vm_page_count_object
);
8864 for (entry
= map
->hdr
.links
.next
; map
; entry
= entry
->links
.next
)
8866 if (entry
->is_sub_map
)
8868 assert(stackIdx
< kMaxKernelDepth
);
8869 maps
[stackIdx
] = map
;
8870 entries
[stackIdx
] = entry
;
8872 map
= VME_SUBMAP(entry
);
8876 if (VME_OBJECT(entry
) == kernel_object
)
8879 vm_object_lock(VME_OBJECT(entry
));
8880 for (offset
= entry
->links
.start
; offset
< entry
->links
.end
; offset
+= page_size
)
8882 page
= vm_page_lookup(VME_OBJECT(entry
), offset
);
8883 if (page
&& VM_PAGE_WIRED(page
)) count
++;
8885 vm_object_unlock(VME_OBJECT(entry
));
8889 assert(VME_ALIAS(entry
) != VM_KERN_MEMORY_NONE
);
8890 assert(VME_ALIAS(entry
) < num_info
);
8891 info
[VME_ALIAS(entry
)].size
+= ptoa_64(count
);
8894 while (map
&& (entry
== vm_map_last_entry(map
)))
8897 if (!stackIdx
) map
= NULL
;
8901 map
= maps
[stackIdx
];
8902 entry
= entries
[stackIdx
];
8909 process_account(info
, num_info
, zones_collectable_bytes
, iterate
);
8911 return (KERN_SUCCESS
);
8914 #if DEBUG || DEVELOPMENT
8917 vm_kern_allocation_info(uintptr_t addr
, vm_size_t
* size
, vm_tag_t
* tag
, vm_size_t
* zone_size
)
8922 vm_map_entry_t entry
;
8924 zsize
= zone_element_info((void *) addr
, tag
);
8927 *zone_size
= *size
= zsize
;
8928 return (KERN_SUCCESS
);
8932 ret
= KERN_INVALID_ADDRESS
;
8933 for (map
= kernel_map
; map
; )
8936 if (!vm_map_lookup_entry(map
, addr
, &entry
)) break;
8937 if (entry
->is_sub_map
)
8939 if (map
!= kernel_map
) break;
8940 map
= VME_SUBMAP(entry
);
8943 if (entry
->vme_start
!= addr
) break;
8944 *tag
= VME_ALIAS(entry
);
8945 *size
= (entry
->vme_end
- addr
);
8949 if (map
!= kernel_map
) vm_map_unlock(map
);
8950 vm_map_unlock(kernel_map
);
8955 #endif /* DEBUG || DEVELOPMENT */
8958 vm_tag_get_kext(vm_tag_t tag
, char * name
, vm_size_t namelen
)
8960 vm_allocation_site_t
* site
;
8964 lck_spin_lock(&vm_allocation_sites_lock
);
8965 if ((site
= vm_allocation_sites
[tag
]))
8967 if (VM_TAG_KMOD
& site
->flags
)
8969 kmodId
= OSKextGetKmodIDForSite(site
, name
, namelen
);
8972 lck_spin_unlock(&vm_allocation_sites_lock
);