2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/task.h>
76 #include <kern/thread.h>
77 #include <kern/kalloc.h>
78 #include <kern/zalloc.h>
80 #include <kern/ledger.h>
82 #include <vm/vm_init.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_pageout.h>
86 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
87 #include <kern/misc_protos.h>
88 #include <zone_debug.h>
89 #include <mach_debug/zone_info.h>
91 #include <pexpert/pexpert.h>
93 #include <vm/vm_protos.h>
94 #include <vm/memory_object.h>
95 #include <vm/vm_purgeable_internal.h>
96 #include <vm/vm_compressor.h>
98 #if CONFIG_PHANTOM_CACHE
99 #include <vm/vm_phantom_cache.h>
102 #include <IOKit/IOHibernatePrivate.h>
104 #include <sys/kdebug.h>
106 boolean_t hibernate_cleaning_in_progress
= FALSE
;
107 boolean_t vm_page_free_verify
= TRUE
;
109 uint32_t vm_lopage_free_count
= 0;
110 uint32_t vm_lopage_free_limit
= 0;
111 uint32_t vm_lopage_lowater
= 0;
112 boolean_t vm_lopage_refill
= FALSE
;
113 boolean_t vm_lopage_needed
= FALSE
;
115 lck_mtx_ext_t vm_page_queue_lock_ext
;
116 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
117 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
119 int speculative_age_index
= 0;
120 int speculative_steal_index
= 0;
121 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
124 __private_extern__
void vm_page_init_lck_grp(void);
126 static void vm_page_free_prepare(vm_page_t page
);
127 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
129 static void vm_tag_init(void);
131 uint64_t vm_min_kernel_and_kext_address
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
134 * Associated with page of user-allocatable memory is a
139 * These variables record the values returned by vm_page_bootstrap,
140 * for debugging purposes. The implementation of pmap_steal_memory
141 * and pmap_startup here also uses them internally.
144 vm_offset_t virtual_space_start
;
145 vm_offset_t virtual_space_end
;
146 uint32_t vm_page_pages
;
149 * The vm_page_lookup() routine, which provides for fast
150 * (virtual memory object, offset) to page lookup, employs
151 * the following hash table. The vm_page_{insert,remove}
152 * routines install and remove associations in the table.
153 * [This table is often called the virtual-to-physical,
157 vm_page_packed_t page_list
;
158 #if MACH_PAGE_HASH_STATS
159 int cur_count
; /* current count */
160 int hi_count
; /* high water mark */
161 #endif /* MACH_PAGE_HASH_STATS */
165 #define BUCKETS_PER_LOCK 16
167 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
168 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
169 unsigned int vm_page_hash_mask
; /* Mask for hash function */
170 unsigned int vm_page_hash_shift
; /* Shift for hash function */
171 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
172 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
174 lck_spin_t
*vm_page_bucket_locks
;
175 lck_spin_t vm_objects_wired_lock
;
176 lck_spin_t vm_allocation_sites_lock
;
178 #if VM_PAGE_BUCKETS_CHECK
179 boolean_t vm_page_buckets_check_ready
= FALSE
;
180 #if VM_PAGE_FAKE_BUCKETS
181 vm_page_bucket_t
*vm_page_fake_buckets
; /* decoy buckets */
182 vm_map_offset_t vm_page_fake_buckets_start
, vm_page_fake_buckets_end
;
183 #endif /* VM_PAGE_FAKE_BUCKETS */
184 #endif /* VM_PAGE_BUCKETS_CHECK */
186 extern int not_in_kdp
;
189 #if MACH_PAGE_HASH_STATS
190 /* This routine is only for debug. It is intended to be called by
191 * hand by a developer using a kernel debugger. This routine prints
192 * out vm_page_hash table statistics to the kernel debug console.
202 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
203 if (vm_page_buckets
[i
].hi_count
) {
205 highsum
+= vm_page_buckets
[i
].hi_count
;
206 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
207 maxdepth
= vm_page_buckets
[i
].hi_count
;
210 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
211 printf("Number used buckets: %d = %d%%\n",
212 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
213 printf("Number unused buckets: %d = %d%%\n",
214 vm_page_bucket_count
- numbuckets
,
215 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
216 printf("Sum of bucket max depth: %d\n", highsum
);
217 printf("Average bucket depth: %d.%2d\n",
218 highsum
/vm_page_bucket_count
,
219 highsum%vm_page_bucket_count
);
220 printf("Maximum bucket depth: %d\n", maxdepth
);
222 #endif /* MACH_PAGE_HASH_STATS */
225 * The virtual page size is currently implemented as a runtime
226 * variable, but is constant once initialized using vm_set_page_size.
227 * This initialization must be done in the machine-dependent
228 * bootstrap sequence, before calling other machine-independent
231 * All references to the virtual page size outside this
232 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
235 vm_size_t page_size
= PAGE_SIZE
;
236 vm_size_t page_mask
= PAGE_MASK
;
237 int page_shift
= PAGE_SHIFT
;
240 * Resident page structures are initialized from
241 * a template (see vm_page_alloc).
243 * When adding a new field to the virtual memory
244 * object structure, be sure to add initialization
245 * (see vm_page_bootstrap).
247 struct vm_page vm_page_template
;
249 vm_page_t vm_pages
= VM_PAGE_NULL
;
250 unsigned int vm_pages_count
= 0;
251 ppnum_t vm_page_lowest
= 0;
254 * Resident pages that represent real memory
255 * are allocated from a set of free lists,
258 unsigned int vm_colors
;
259 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
260 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
261 unsigned int vm_free_magazine_refill_limit
= 0;
262 queue_head_t vm_page_queue_free
[MAX_COLORS
];
263 unsigned int vm_page_free_wanted
;
264 unsigned int vm_page_free_wanted_privileged
;
265 unsigned int vm_page_free_count
;
266 unsigned int vm_page_fictitious_count
;
269 * Occasionally, the virtual memory system uses
270 * resident page structures that do not refer to
271 * real pages, for example to leave a page with
272 * important state information in the VP table.
274 * These page structures are allocated the way
275 * most other kernel structures are.
278 vm_locks_array_t vm_page_locks
;
279 decl_lck_mtx_data(,vm_page_alloc_lock
)
280 lck_mtx_ext_t vm_page_alloc_lock_ext
;
282 unsigned int io_throttle_zero_fill
;
284 unsigned int vm_page_local_q_count
= 0;
285 unsigned int vm_page_local_q_soft_limit
= 250;
286 unsigned int vm_page_local_q_hard_limit
= 500;
287 struct vplq
*vm_page_local_q
= NULL
;
289 /* N.B. Guard and fictitious pages must not
290 * be assigned a zero phys_page value.
293 * Fictitious pages don't have a physical address,
294 * but we must initialize phys_page to something.
295 * For debugging, this should be a strange value
296 * that the pmap module can recognize in assertions.
298 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
301 * Guard pages are not accessible so they don't
302 * need a physical address, but we need to enter
304 * Let's make it recognizable and make sure that
305 * we don't use a real physical page with that
308 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
311 * Resident page structures are also chained on
312 * queues that are used by the page replacement
313 * system (pageout daemon). These queues are
314 * defined here, but are shared by the pageout
315 * module. The inactive queue is broken into
316 * file backed and anonymous for convenience as the
317 * pageout daemon often assignes a higher
318 * importance to anonymous pages (less likely to pick)
320 queue_head_t vm_page_queue_active
;
321 queue_head_t vm_page_queue_inactive
;
322 queue_head_t vm_page_queue_anonymous
; /* inactive memory queue for anonymous pages */
323 queue_head_t vm_page_queue_throttled
;
325 queue_head_t vm_objects_wired
;
327 unsigned int vm_page_active_count
;
328 unsigned int vm_page_inactive_count
;
329 unsigned int vm_page_anonymous_count
;
330 unsigned int vm_page_throttled_count
;
331 unsigned int vm_page_speculative_count
;
333 unsigned int vm_page_wire_count
;
334 unsigned int vm_page_stolen_count
;
335 unsigned int vm_page_wire_count_initial
;
336 unsigned int vm_page_pages_initial
;
337 unsigned int vm_page_gobble_count
= 0;
339 #define VM_PAGE_WIRE_COUNT_WARNING 0
340 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
342 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
343 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
344 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
346 unsigned int vm_page_xpmapped_external_count
= 0;
347 unsigned int vm_page_external_count
= 0;
348 unsigned int vm_page_internal_count
= 0;
349 unsigned int vm_page_pageable_external_count
= 0;
350 unsigned int vm_page_pageable_internal_count
= 0;
352 #if DEVELOPMENT || DEBUG
353 unsigned int vm_page_speculative_recreated
= 0;
354 unsigned int vm_page_speculative_created
= 0;
355 unsigned int vm_page_speculative_used
= 0;
358 queue_head_t vm_page_queue_cleaned
;
360 unsigned int vm_page_cleaned_count
= 0;
361 unsigned int vm_pageout_enqueued_cleaned
= 0;
363 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
364 ppnum_t max_valid_low_ppnum
= 0xffffffff;
368 * Several page replacement parameters are also
369 * shared with this module, so that page allocation
370 * (done here in vm_page_alloc) can trigger the
373 unsigned int vm_page_free_target
= 0;
374 unsigned int vm_page_free_min
= 0;
375 unsigned int vm_page_throttle_limit
= 0;
376 unsigned int vm_page_inactive_target
= 0;
377 unsigned int vm_page_anonymous_min
= 0;
378 unsigned int vm_page_inactive_min
= 0;
379 unsigned int vm_page_free_reserved
= 0;
380 unsigned int vm_page_throttle_count
= 0;
384 * The VM system has a couple of heuristics for deciding
385 * that pages are "uninteresting" and should be placed
386 * on the inactive queue as likely candidates for replacement.
387 * These variables let the heuristics be controlled at run-time
388 * to make experimentation easier.
391 boolean_t vm_page_deactivate_hint
= TRUE
;
393 struct vm_page_stats_reusable vm_page_stats_reusable
;
398 * Sets the page size, perhaps based upon the memory
399 * size. Must be called before any use of page-size
400 * dependent functions.
402 * Sets page_shift and page_mask from page_size.
405 vm_set_page_size(void)
407 page_size
= PAGE_SIZE
;
408 page_mask
= PAGE_MASK
;
409 page_shift
= PAGE_SHIFT
;
411 if ((page_mask
& page_size
) != 0)
412 panic("vm_set_page_size: page size not a power of two");
414 for (page_shift
= 0; ; page_shift
++)
415 if ((1U << page_shift
) == page_size
)
419 #define COLOR_GROUPS_TO_STEAL 4
422 /* Called once during statup, once the cache geometry is known.
425 vm_page_set_colors( void )
427 unsigned int n
, override
;
429 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
431 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
432 n
= vm_cache_geometry_colors
;
433 else n
= DEFAULT_COLORS
; /* use default if all else fails */
437 if ( n
> MAX_COLORS
)
440 /* the count must be a power of 2 */
441 if ( ( n
& (n
- 1)) != 0 )
442 panic("vm_page_set_colors");
445 vm_color_mask
= n
- 1;
447 vm_free_magazine_refill_limit
= vm_colors
* COLOR_GROUPS_TO_STEAL
;
451 lck_grp_t vm_page_lck_grp_free
;
452 lck_grp_t vm_page_lck_grp_queue
;
453 lck_grp_t vm_page_lck_grp_local
;
454 lck_grp_t vm_page_lck_grp_purge
;
455 lck_grp_t vm_page_lck_grp_alloc
;
456 lck_grp_t vm_page_lck_grp_bucket
;
457 lck_grp_attr_t vm_page_lck_grp_attr
;
458 lck_attr_t vm_page_lck_attr
;
461 __private_extern__
void
462 vm_page_init_lck_grp(void)
465 * initialze the vm_page lock world
467 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
468 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
469 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
470 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
471 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
472 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
473 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
474 lck_attr_setdefault(&vm_page_lck_attr
);
475 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
477 vm_compressor_init_locks();
481 vm_page_init_local_q()
483 unsigned int num_cpus
;
485 struct vplq
*t_local_q
;
487 num_cpus
= ml_get_max_cpus();
490 * no point in this for a uni-processor system
493 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
495 for (i
= 0; i
< num_cpus
; i
++) {
498 lq
= &t_local_q
[i
].vpl_un
.vpl
;
499 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
500 queue_init(&lq
->vpl_queue
);
502 lq
->vpl_internal_count
= 0;
503 lq
->vpl_external_count
= 0;
505 vm_page_local_q_count
= num_cpus
;
507 vm_page_local_q
= (struct vplq
*)t_local_q
;
515 * Initializes the resident memory module.
517 * Allocates memory for the page cells, and
518 * for the object/offset-to-page hash table headers.
519 * Each page cell is initialized and placed on the free list.
520 * Returns the range of available kernel virtual memory.
528 register vm_page_t m
;
535 * Initialize the vm_page template.
538 m
= &vm_page_template
;
539 bzero(m
, sizeof (*m
));
541 m
->pageq
.next
= NULL
;
542 m
->pageq
.prev
= NULL
;
543 m
->listq
.next
= NULL
;
544 m
->listq
.prev
= NULL
;
545 m
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
547 m
->object
= VM_OBJECT_NULL
; /* reset later */
548 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
554 m
->pageout_queue
= FALSE
;
555 m
->speculative
= FALSE
;
558 m
->reference
= FALSE
;
561 m
->throttled
= FALSE
;
562 m
->__unused_pageq_bits
= 0;
564 m
->phys_page
= 0; /* reset later */
570 m
->fictitious
= FALSE
;
579 m
->clustered
= FALSE
;
580 m
->overwriting
= FALSE
;
583 m
->encrypted
= FALSE
;
584 m
->encrypted_cleaning
= FALSE
;
585 m
->cs_validated
= FALSE
;
586 m
->cs_tainted
= FALSE
;
592 m
->compressor
= FALSE
;
593 m
->written_by_kernel
= FALSE
;
594 m
->__unused_object_bits
= 0;
597 * Initialize the page queues.
599 vm_page_init_lck_grp();
601 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
602 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
603 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
605 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
608 purgeable_queues
[i
].token_q_head
= 0;
609 purgeable_queues
[i
].token_q_tail
= 0;
610 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
611 queue_init(&purgeable_queues
[i
].objq
[group
]);
613 purgeable_queues
[i
].type
= i
;
614 purgeable_queues
[i
].new_pages
= 0;
616 purgeable_queues
[i
].debug_count_tokens
= 0;
617 purgeable_queues
[i
].debug_count_objects
= 0;
620 purgeable_nonvolatile_count
= 0;
621 queue_init(&purgeable_nonvolatile_queue
);
623 for (i
= 0; i
< MAX_COLORS
; i
++ )
624 queue_init(&vm_page_queue_free
[i
]);
626 queue_init(&vm_lopage_queue_free
);
627 queue_init(&vm_page_queue_active
);
628 queue_init(&vm_page_queue_inactive
);
629 queue_init(&vm_page_queue_cleaned
);
630 queue_init(&vm_page_queue_throttled
);
631 queue_init(&vm_page_queue_anonymous
);
632 queue_init(&vm_objects_wired
);
634 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
635 queue_init(&vm_page_queue_speculative
[i
].age_q
);
637 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
638 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
640 vm_page_free_wanted
= 0;
641 vm_page_free_wanted_privileged
= 0;
643 vm_page_set_colors();
647 * Steal memory for the map and zone subsystems.
649 kernel_debug_string_simple("zone_steal_memory");
651 kernel_debug_string_simple("vm_map_steal_memory");
652 vm_map_steal_memory();
655 * Allocate (and initialize) the virtual-to-physical
656 * table hash buckets.
658 * The number of buckets should be a power of two to
659 * get a good hash function. The following computation
660 * chooses the first power of two that is greater
661 * than the number of physical pages in the system.
664 if (vm_page_bucket_count
== 0) {
665 unsigned int npages
= pmap_free_pages();
667 vm_page_bucket_count
= 1;
668 while (vm_page_bucket_count
< npages
)
669 vm_page_bucket_count
<<= 1;
671 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
673 vm_page_hash_mask
= vm_page_bucket_count
- 1;
676 * Calculate object shift value for hashing algorithm:
677 * O = log2(sizeof(struct vm_object))
678 * B = log2(vm_page_bucket_count)
679 * hash shifts the object left by
682 size
= vm_page_bucket_count
;
683 for (log1
= 0; size
> 1; log1
++)
685 size
= sizeof(struct vm_object
);
686 for (log2
= 0; size
> 1; log2
++)
688 vm_page_hash_shift
= log1
/2 - log2
+ 1;
690 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
691 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
692 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
694 if (vm_page_hash_mask
& vm_page_bucket_count
)
695 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
697 #if VM_PAGE_BUCKETS_CHECK
698 #if VM_PAGE_FAKE_BUCKETS
700 * Allocate a decoy set of page buckets, to detect
701 * any stomping there.
703 vm_page_fake_buckets
= (vm_page_bucket_t
*)
704 pmap_steal_memory(vm_page_bucket_count
*
705 sizeof(vm_page_bucket_t
));
706 vm_page_fake_buckets_start
= (vm_map_offset_t
) vm_page_fake_buckets
;
707 vm_page_fake_buckets_end
=
708 vm_map_round_page((vm_page_fake_buckets_start
+
709 (vm_page_bucket_count
*
710 sizeof (vm_page_bucket_t
))),
713 for (cp
= (char *)vm_page_fake_buckets_start
;
714 cp
< (char *)vm_page_fake_buckets_end
;
718 #endif /* VM_PAGE_FAKE_BUCKETS */
719 #endif /* VM_PAGE_BUCKETS_CHECK */
721 kernel_debug_string_simple("vm_page_buckets");
722 vm_page_buckets
= (vm_page_bucket_t
*)
723 pmap_steal_memory(vm_page_bucket_count
*
724 sizeof(vm_page_bucket_t
));
726 kernel_debug_string_simple("vm_page_bucket_locks");
727 vm_page_bucket_locks
= (lck_spin_t
*)
728 pmap_steal_memory(vm_page_bucket_lock_count
*
731 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
732 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
734 bucket
->page_list
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
735 #if MACH_PAGE_HASH_STATS
736 bucket
->cur_count
= 0;
737 bucket
->hi_count
= 0;
738 #endif /* MACH_PAGE_HASH_STATS */
741 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
742 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
744 lck_spin_init(&vm_objects_wired_lock
, &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
745 lck_spin_init(&vm_allocation_sites_lock
, &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
748 #if VM_PAGE_BUCKETS_CHECK
749 vm_page_buckets_check_ready
= TRUE
;
750 #endif /* VM_PAGE_BUCKETS_CHECK */
753 * Machine-dependent code allocates the resident page table.
754 * It uses vm_page_init to initialize the page frames.
755 * The code also returns to us the virtual space available
756 * to the kernel. We don't trust the pmap module
757 * to get the alignment right.
760 kernel_debug_string_simple("pmap_startup");
761 pmap_startup(&virtual_space_start
, &virtual_space_end
);
762 virtual_space_start
= round_page(virtual_space_start
);
763 virtual_space_end
= trunc_page(virtual_space_end
);
765 *startp
= virtual_space_start
;
766 *endp
= virtual_space_end
;
769 * Compute the initial "wire" count.
770 * Up until now, the pages which have been set aside are not under
771 * the VM system's control, so although they aren't explicitly
772 * wired, they nonetheless can't be moved. At this moment,
773 * all VM managed pages are "free", courtesy of pmap_startup.
775 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
776 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
777 vm_page_wire_count_initial
= vm_page_wire_count
;
778 vm_page_pages_initial
= vm_page_pages
;
780 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
781 vm_page_free_count
, vm_page_wire_count
);
783 kernel_debug_string_simple("vm_page_bootstrap complete");
784 simple_lock_init(&vm_paging_lock
, 0);
787 #ifndef MACHINE_PAGES
789 * We implement pmap_steal_memory and pmap_startup with the help
790 * of two simpler functions, pmap_virtual_space and pmap_next_page.
797 vm_offset_t addr
, vaddr
;
801 * We round the size to a round multiple.
804 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
807 * If this is the first call to pmap_steal_memory,
808 * we have to initialize ourself.
811 if (virtual_space_start
== virtual_space_end
) {
812 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
815 * The initial values must be aligned properly, and
816 * we don't trust the pmap module to do it right.
819 virtual_space_start
= round_page(virtual_space_start
);
820 virtual_space_end
= trunc_page(virtual_space_end
);
824 * Allocate virtual memory for this request.
827 addr
= virtual_space_start
;
828 virtual_space_start
+= size
;
830 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
833 * Allocate and map physical pages to back new virtual pages.
836 for (vaddr
= round_page(addr
);
838 vaddr
+= PAGE_SIZE
) {
840 if (!pmap_next_page_hi(&phys_page
))
841 panic("pmap_steal_memory");
844 * XXX Logically, these mappings should be wired,
845 * but some pmap modules barf if they are.
847 #if defined(__LP64__)
848 pmap_pre_expand(kernel_pmap
, vaddr
);
851 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
852 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
853 VM_WIMG_USE_DEFAULT
, FALSE
);
855 * Account for newly stolen memory
857 vm_page_wire_count
++;
858 vm_page_stolen_count
++;
861 return (void *) addr
;
864 void vm_page_release_startup(vm_page_t mem
);
870 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
875 #if defined(__LP64__)
877 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
879 assert(sizeof(struct vm_page
) == 64);
882 * make sure we are aligned on a 64 byte boundary
883 * for VM_PAGE_PACK_PTR (it clips off the low-order
884 * 6 bits of the pointer)
886 if (virtual_space_start
!= virtual_space_end
)
887 virtual_space_start
= round_page(virtual_space_start
);
891 * We calculate how many page frames we will have
892 * and then allocate the page structures in one chunk.
895 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
896 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
897 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
899 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
902 * Initialize the page frames.
904 kernel_debug_string_simple("Initialize the page frames");
905 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
906 if (!pmap_next_page(&phys_page
))
908 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
909 vm_page_lowest
= phys_page
;
911 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
915 vm_pages_count
= pages_initialized
;
917 #if defined(__LP64__)
919 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[0])) != &vm_pages
[0])
920 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages
[0]);
922 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages
[vm_pages_count
-1])) != &vm_pages
[vm_pages_count
-1])
923 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages
[vm_pages_count
-1]);
925 kernel_debug_string_simple("page fill/release");
927 * Check if we want to initialize pages to a known value
929 fill
= 0; /* Assume no fill */
930 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
932 /* This slows down booting the DEBUG kernel, particularly on
933 * large memory systems, but is worthwhile in deterministically
934 * trapping uninitialized memory usage.
938 fillval
= 0xDEB8F177;
942 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
943 // -debug code remove
944 if (2 == vm_himemory_mode
) {
945 // free low -> high so high is preferred
946 for (i
= 1; i
<= pages_initialized
; i
++) {
947 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
948 vm_page_release_startup(&vm_pages
[i
- 1]);
952 // debug code remove-
955 * Release pages in reverse order so that physical pages
956 * initially get allocated in ascending addresses. This keeps
957 * the devices (which must address physical memory) happy if
958 * they require several consecutive pages.
960 for (i
= pages_initialized
; i
> 0; i
--) {
961 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
962 vm_page_release_startup(&vm_pages
[i
- 1]);
965 VM_CHECK_MEMORYSTATUS
;
969 vm_page_t xx
, xxo
, xxl
;
972 j
= 0; /* (BRINGUP) */
975 for( i
= 0; i
< vm_colors
; i
++ ) {
976 queue_iterate(&vm_page_queue_free
[i
],
979 pageq
) { /* BRINGUP */
981 if(j
> vm_page_free_count
) { /* (BRINGUP) */
982 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
985 l
= vm_page_free_count
- j
; /* (BRINGUP) */
986 k
= 0; /* (BRINGUP) */
988 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
990 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
992 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
993 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
994 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
1002 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
1003 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
1010 * We have to re-align virtual_space_start,
1011 * because pmap_steal_memory has been using it.
1014 virtual_space_start
= round_page(virtual_space_start
);
1016 *startp
= virtual_space_start
;
1017 *endp
= virtual_space_end
;
1019 #endif /* MACHINE_PAGES */
1022 * Routine: vm_page_module_init
1024 * Second initialization pass, to be done after
1025 * the basic VM system is ready.
1028 vm_page_module_init(void)
1030 uint64_t vm_page_zone_pages
, vm_page_zone_data_size
;
1031 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
1032 0, PAGE_SIZE
, "vm pages");
1035 zone_debug_disable(vm_page_zone
);
1036 #endif /* ZONE_DEBUG */
1038 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
1039 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
1040 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
1041 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
1042 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
1044 * Adjust zone statistics to account for the real pages allocated
1045 * in vm_page_create(). [Q: is this really what we want?]
1047 vm_page_zone
->count
+= vm_page_pages
;
1048 vm_page_zone
->sum_count
+= vm_page_pages
;
1049 vm_page_zone_data_size
= vm_page_pages
* vm_page_zone
->elem_size
;
1050 vm_page_zone
->cur_size
+= vm_page_zone_data_size
;
1051 vm_page_zone_pages
= ((round_page(vm_page_zone_data_size
)) / PAGE_SIZE
);
1052 OSAddAtomic64(vm_page_zone_pages
, &(vm_page_zone
->page_count
));
1053 /* since zone accounts for these, take them out of stolen */
1054 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages
);
1058 * Routine: vm_page_create
1060 * After the VM system is up, machine-dependent code
1061 * may stumble across more physical memory. For example,
1062 * memory that it was reserving for a frame buffer.
1063 * vm_page_create turns this memory into available pages.
1074 for (phys_page
= start
;
1077 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
1079 vm_page_more_fictitious();
1081 m
->fictitious
= FALSE
;
1082 pmap_clear_noencrypt(phys_page
);
1092 * Distributes the object/offset key pair among hash buckets.
1094 * NOTE: The bucket count must be a power of 2
1096 #define vm_page_hash(object, offset) (\
1097 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1098 & vm_page_hash_mask)
1102 * vm_page_insert: [ internal use only ]
1104 * Inserts the given mem entry into the object/object-page
1105 * table and object list.
1107 * The object must be locked.
1113 vm_object_offset_t offset
)
1115 vm_page_insert_internal(mem
, object
, offset
, VM_KERN_MEMORY_NONE
, FALSE
, TRUE
, FALSE
, FALSE
, NULL
);
1119 vm_page_insert_wired(
1122 vm_object_offset_t offset
,
1125 vm_page_insert_internal(mem
, object
, offset
, tag
, FALSE
, TRUE
, FALSE
, FALSE
, NULL
);
1129 vm_page_insert_internal(
1132 vm_object_offset_t offset
,
1134 boolean_t queues_lock_held
,
1135 boolean_t insert_in_hash
,
1136 boolean_t batch_pmap_op
,
1137 boolean_t batch_accounting
,
1138 uint64_t *delayed_ledger_update
)
1140 vm_page_bucket_t
*bucket
;
1141 lck_spin_t
*bucket_lock
;
1146 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1147 object
, offset
, mem
, 0,0);
1150 * we may not hold the page queue lock
1151 * so this check isn't safe to make
1156 assert(page_aligned(offset
));
1158 assert(!VM_PAGE_WIRED(mem
) || mem
->private || mem
->fictitious
|| (tag
!= VM_KERN_MEMORY_NONE
));
1160 /* the vm_submap_object is only a placeholder for submaps */
1161 assert(object
!= vm_submap_object
);
1163 vm_object_lock_assert_exclusive(object
);
1165 lck_mtx_assert(&vm_page_queue_lock
,
1166 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1167 : LCK_MTX_ASSERT_NOTOWNED
);
1170 if (insert_in_hash
== TRUE
) {
1171 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1172 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1173 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1174 "already in (obj=%p,off=0x%llx)",
1175 mem
, object
, offset
, mem
->object
, mem
->offset
);
1177 assert(!object
->internal
|| offset
< object
->vo_size
);
1179 /* only insert "pageout" pages into "pageout" objects,
1180 * and normal pages into normal objects */
1183 * For some reason, this assertion gets tripped
1184 * but it's mostly harmless, so let's disable it
1187 assert(object
->pageout
== mem
->pageout
);
1190 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1193 * Record the object/offset pair in this page
1196 mem
->object
= object
;
1197 mem
->offset
= offset
;
1200 * Insert it into the object_object/offset hash table
1202 hash_id
= vm_page_hash(object
, offset
);
1203 bucket
= &vm_page_buckets
[hash_id
];
1204 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1206 lck_spin_lock(bucket_lock
);
1208 mem
->next_m
= bucket
->page_list
;
1209 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1210 assert(mem
== VM_PAGE_UNPACK_PTR(bucket
->page_list
));
1212 #if MACH_PAGE_HASH_STATS
1213 if (++bucket
->cur_count
> bucket
->hi_count
)
1214 bucket
->hi_count
= bucket
->cur_count
;
1215 #endif /* MACH_PAGE_HASH_STATS */
1217 lck_spin_unlock(bucket_lock
);
1221 unsigned int cache_attr
;
1223 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1225 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1226 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1230 * Now link into the object's list of backed pages.
1232 queue_enter(&object
->memq
, mem
, vm_page_t
, listq
);
1233 object
->memq_hint
= mem
;
1237 * Show that the object has one more resident page.
1240 object
->resident_page_count
++;
1241 if (VM_PAGE_WIRED(mem
)) {
1242 if (!mem
->private && !mem
->fictitious
)
1244 if (!object
->wired_page_count
)
1246 assert(VM_KERN_MEMORY_NONE
!= tag
);
1247 object
->wire_tag
= tag
;
1248 VM_OBJECT_WIRED(object
);
1251 object
->wired_page_count
++;
1253 assert(object
->resident_page_count
>= object
->wired_page_count
);
1255 if (batch_accounting
== FALSE
) {
1256 if (object
->internal
) {
1257 OSAddAtomic(1, &vm_page_internal_count
);
1259 OSAddAtomic(1, &vm_page_external_count
);
1264 * It wouldn't make sense to insert a "reusable" page in
1265 * an object (the page would have been marked "reusable" only
1266 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1267 * in the object at that time).
1268 * But a page could be inserted in a "all_reusable" object, if
1269 * something faults it in (a vm_read() from another task or a
1270 * "use-after-free" issue in user space, for example). It can
1271 * also happen if we're relocating a page from that object to
1272 * a different physical page during a physically-contiguous
1275 assert(!mem
->reusable
);
1276 if (mem
->object
->all_reusable
) {
1277 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1280 if (object
->purgable
== VM_PURGABLE_DENY
) {
1283 owner
= object
->vo_purgeable_owner
;
1286 (object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1287 VM_PAGE_WIRED(mem
))) {
1289 if (delayed_ledger_update
)
1290 *delayed_ledger_update
+= PAGE_SIZE
;
1292 /* more non-volatile bytes */
1293 ledger_credit(owner
->ledger
,
1294 task_ledgers
.purgeable_nonvolatile
,
1296 /* more footprint */
1297 ledger_credit(owner
->ledger
,
1298 task_ledgers
.phys_footprint
,
1303 (object
->purgable
== VM_PURGABLE_VOLATILE
||
1304 object
->purgable
== VM_PURGABLE_EMPTY
)) {
1305 assert(! VM_PAGE_WIRED(mem
));
1306 /* more volatile bytes */
1307 ledger_credit(owner
->ledger
,
1308 task_ledgers
.purgeable_volatile
,
1312 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1313 if (VM_PAGE_WIRED(mem
)) {
1314 OSAddAtomic(+1, &vm_page_purgeable_wired_count
);
1316 OSAddAtomic(+1, &vm_page_purgeable_count
);
1318 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1321 * This page belongs to a purged VM object but hasn't
1322 * been purged (because it was "busy").
1323 * It's in the "throttled" queue and hence not
1324 * visible to vm_pageout_scan(). Move it to a pageable
1325 * queue, so that it can eventually be reclaimed, instead
1326 * of lingering in the "empty" object.
1328 if (queues_lock_held
== FALSE
)
1329 vm_page_lockspin_queues();
1330 vm_page_deactivate(mem
);
1331 if (queues_lock_held
== FALSE
)
1332 vm_page_unlock_queues();
1335 #if VM_OBJECT_TRACKING_OP_MODIFIED
1336 if (vm_object_tracking_inited
&&
1338 object
->resident_page_count
== 0 &&
1339 object
->pager
== NULL
&&
1340 object
->shadow
!= NULL
&&
1341 object
->shadow
->copy
== object
) {
1342 void *bt
[VM_OBJECT_TRACKING_BTDEPTH
];
1345 numsaved
=OSBacktrace(bt
, VM_OBJECT_TRACKING_BTDEPTH
);
1346 btlog_add_entry(vm_object_tracking_btlog
,
1348 VM_OBJECT_TRACKING_OP_MODIFIED
,
1352 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1358 * Exactly like vm_page_insert, except that we first
1359 * remove any existing page at the given offset in object.
1361 * The object must be locked.
1365 register vm_page_t mem
,
1366 register vm_object_t object
,
1367 register vm_object_offset_t offset
)
1369 vm_page_bucket_t
*bucket
;
1370 vm_page_t found_m
= VM_PAGE_NULL
;
1371 lck_spin_t
*bucket_lock
;
1376 * we don't hold the page queue lock
1377 * so this check isn't safe to make
1381 vm_object_lock_assert_exclusive(object
);
1382 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1383 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1384 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1385 "already in (obj=%p,off=0x%llx)",
1386 mem
, object
, offset
, mem
->object
, mem
->offset
);
1387 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1390 * Record the object/offset pair in this page
1393 mem
->object
= object
;
1394 mem
->offset
= offset
;
1397 * Insert it into the object_object/offset hash table,
1398 * replacing any page that might have been there.
1401 hash_id
= vm_page_hash(object
, offset
);
1402 bucket
= &vm_page_buckets
[hash_id
];
1403 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1405 lck_spin_lock(bucket_lock
);
1407 if (bucket
->page_list
) {
1408 vm_page_packed_t
*mp
= &bucket
->page_list
;
1409 vm_page_t m
= VM_PAGE_UNPACK_PTR(*mp
);
1412 if (m
->object
== object
&& m
->offset
== offset
) {
1414 * Remove old page from hash list
1423 } while ((m
= VM_PAGE_UNPACK_PTR(*mp
)));
1425 mem
->next_m
= bucket
->page_list
;
1427 mem
->next_m
= VM_PAGE_PACK_PTR(VM_PAGE_NULL
);
1430 * insert new page at head of hash list
1432 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
1435 lck_spin_unlock(bucket_lock
);
1439 * there was already a page at the specified
1440 * offset for this object... remove it from
1441 * the object and free it back to the free list
1443 vm_page_free_unlocked(found_m
, FALSE
);
1445 vm_page_insert_internal(mem
, object
, offset
, VM_KERN_MEMORY_NONE
, FALSE
, FALSE
, FALSE
, FALSE
, NULL
);
1449 * vm_page_remove: [ internal use only ]
1451 * Removes the given mem entry from the object/offset-page
1452 * table and the object page list.
1454 * The object must be locked.
1460 boolean_t remove_from_hash
)
1462 vm_page_bucket_t
*bucket
;
1464 lck_spin_t
*bucket_lock
;
1469 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1470 mem
->object
, mem
->offset
,
1473 vm_object_lock_assert_exclusive(mem
->object
);
1474 assert(mem
->tabled
);
1475 assert(!mem
->cleaning
);
1476 assert(!mem
->laundry
);
1479 * we don't hold the page queue lock
1480 * so this check isn't safe to make
1484 if (remove_from_hash
== TRUE
) {
1486 * Remove from the object_object/offset hash table
1488 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1489 bucket
= &vm_page_buckets
[hash_id
];
1490 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1492 lck_spin_lock(bucket_lock
);
1494 if ((this = VM_PAGE_UNPACK_PTR(bucket
->page_list
)) == mem
) {
1495 /* optimize for common case */
1497 bucket
->page_list
= mem
->next_m
;
1499 vm_page_packed_t
*prev
;
1501 for (prev
= &this->next_m
;
1502 (this = VM_PAGE_UNPACK_PTR(*prev
)) != mem
;
1503 prev
= &this->next_m
)
1505 *prev
= this->next_m
;
1507 #if MACH_PAGE_HASH_STATS
1508 bucket
->cur_count
--;
1509 #endif /* MACH_PAGE_HASH_STATS */
1510 mem
->hashed
= FALSE
;
1511 lck_spin_unlock(bucket_lock
);
1514 * Now remove from the object's list of backed pages.
1517 vm_page_remove_internal(mem
);
1520 * And show that the object has one fewer resident
1524 assert(mem
->object
->resident_page_count
> 0);
1525 mem
->object
->resident_page_count
--;
1527 if (mem
->object
->internal
) {
1529 assert(vm_page_internal_count
);
1532 OSAddAtomic(-1, &vm_page_internal_count
);
1534 assert(vm_page_external_count
);
1535 OSAddAtomic(-1, &vm_page_external_count
);
1537 if (mem
->xpmapped
) {
1538 assert(vm_page_xpmapped_external_count
);
1539 OSAddAtomic(-1, &vm_page_xpmapped_external_count
);
1542 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1543 if (mem
->object
->resident_page_count
== 0)
1544 vm_object_cache_remove(mem
->object
);
1547 if (VM_PAGE_WIRED(mem
)) {
1548 assert(mem
->object
->wired_page_count
> 0);
1549 mem
->object
->wired_page_count
--;
1550 if (!mem
->object
->wired_page_count
) {
1551 VM_OBJECT_UNWIRED(mem
->object
);
1554 assert(mem
->object
->resident_page_count
>=
1555 mem
->object
->wired_page_count
);
1556 if (mem
->reusable
) {
1557 assert(mem
->object
->reusable_page_count
> 0);
1558 mem
->object
->reusable_page_count
--;
1559 assert(mem
->object
->reusable_page_count
<=
1560 mem
->object
->resident_page_count
);
1561 mem
->reusable
= FALSE
;
1562 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1563 vm_page_stats_reusable
.reused_remove
++;
1564 } else if (mem
->object
->all_reusable
) {
1565 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1566 vm_page_stats_reusable
.reused_remove
++;
1569 if (mem
->object
->purgable
== VM_PURGABLE_DENY
) {
1572 owner
= mem
->object
->vo_purgeable_owner
;
1575 (mem
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
1576 VM_PAGE_WIRED(mem
))) {
1577 /* less non-volatile bytes */
1578 ledger_debit(owner
->ledger
,
1579 task_ledgers
.purgeable_nonvolatile
,
1581 /* less footprint */
1582 ledger_debit(owner
->ledger
,
1583 task_ledgers
.phys_footprint
,
1586 (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
1587 mem
->object
->purgable
== VM_PURGABLE_EMPTY
)) {
1588 assert(! VM_PAGE_WIRED(mem
));
1589 /* less volatile bytes */
1590 ledger_debit(owner
->ledger
,
1591 task_ledgers
.purgeable_volatile
,
1594 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1595 if (VM_PAGE_WIRED(mem
)) {
1596 assert(vm_page_purgeable_wired_count
> 0);
1597 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1599 assert(vm_page_purgeable_count
> 0);
1600 OSAddAtomic(-1, &vm_page_purgeable_count
);
1603 if (mem
->object
->set_cache_attr
== TRUE
)
1604 pmap_set_cache_attributes(mem
->phys_page
, 0);
1606 mem
->tabled
= FALSE
;
1607 mem
->object
= VM_OBJECT_NULL
;
1608 mem
->offset
= (vm_object_offset_t
) -1;
1615 * Returns the page associated with the object/offset
1616 * pair specified; if none is found, VM_PAGE_NULL is returned.
1618 * The object must be locked. No side effects.
1621 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1623 #if DEBUG_VM_PAGE_LOOKUP
1627 uint64_t vpl_empty_obj
;
1628 uint64_t vpl_bucket_NULL
;
1629 uint64_t vpl_hit_hint
;
1630 uint64_t vpl_hit_hint_next
;
1631 uint64_t vpl_hit_hint_prev
;
1637 uint64_t vpl_fast_elapsed
;
1638 uint64_t vpl_slow_elapsed
;
1639 } vm_page_lookup_stats
__attribute__((aligned(8)));
1643 #define KDP_VM_PAGE_WALK_MAX 1000
1648 vm_object_offset_t offset
)
1651 int num_traversed
= 0;
1654 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1657 queue_iterate(&object
->memq
, cur_page
, vm_page_t
, listq
) {
1658 if (cur_page
->offset
== offset
) {
1663 if (num_traversed
>= KDP_VM_PAGE_WALK_MAX
) {
1664 return VM_PAGE_NULL
;
1668 return VM_PAGE_NULL
;
1674 vm_object_offset_t offset
)
1677 vm_page_bucket_t
*bucket
;
1679 lck_spin_t
*bucket_lock
= NULL
;
1681 #if DEBUG_VM_PAGE_LOOKUP
1682 uint64_t start
, elapsed
;
1684 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_total
);
1686 vm_object_lock_assert_held(object
);
1688 if (object
->resident_page_count
== 0) {
1689 #if DEBUG_VM_PAGE_LOOKUP
1690 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_empty_obj
);
1692 return (VM_PAGE_NULL
);
1695 mem
= object
->memq_hint
;
1697 if (mem
!= VM_PAGE_NULL
) {
1698 assert(mem
->object
== object
);
1700 if (mem
->offset
== offset
) {
1701 #if DEBUG_VM_PAGE_LOOKUP
1702 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint
);
1706 qe
= queue_next(&mem
->listq
);
1708 if (! queue_end(&object
->memq
, qe
)) {
1709 vm_page_t next_page
;
1711 next_page
= (vm_page_t
) qe
;
1712 assert(next_page
->object
== object
);
1714 if (next_page
->offset
== offset
) {
1715 object
->memq_hint
= next_page
; /* new hint */
1716 #if DEBUG_VM_PAGE_LOOKUP
1717 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint_next
);
1722 qe
= queue_prev(&mem
->listq
);
1724 if (! queue_end(&object
->memq
, qe
)) {
1725 vm_page_t prev_page
;
1727 prev_page
= (vm_page_t
) qe
;
1728 assert(prev_page
->object
== object
);
1730 if (prev_page
->offset
== offset
) {
1731 object
->memq_hint
= prev_page
; /* new hint */
1732 #if DEBUG_VM_PAGE_LOOKUP
1733 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit_hint_prev
);
1740 * Search the hash table for this object/offset pair
1742 hash_id
= vm_page_hash(object
, offset
);
1743 bucket
= &vm_page_buckets
[hash_id
];
1746 * since we hold the object lock, we are guaranteed that no
1747 * new pages can be inserted into this object... this in turn
1748 * guarantess that the page we're looking for can't exist
1749 * if the bucket it hashes to is currently NULL even when looked
1750 * at outside the scope of the hash bucket lock... this is a
1751 * really cheap optimiztion to avoid taking the lock
1753 if (!bucket
->page_list
) {
1754 #if DEBUG_VM_PAGE_LOOKUP
1755 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_bucket_NULL
);
1757 return (VM_PAGE_NULL
);
1760 #if DEBUG_VM_PAGE_LOOKUP
1761 start
= mach_absolute_time();
1763 if (object
->resident_page_count
<= VM_PAGE_HASH_LOOKUP_THRESHOLD
) {
1765 * on average, it's roughly 3 times faster to run a short memq list
1766 * than to take the spin lock and go through the hash list
1768 mem
= (vm_page_t
)queue_first(&object
->memq
);
1770 while (!queue_end(&object
->memq
, (queue_entry_t
)mem
)) {
1772 if (mem
->offset
== offset
)
1775 mem
= (vm_page_t
)queue_next(&mem
->listq
);
1777 if (queue_end(&object
->memq
, (queue_entry_t
)mem
))
1781 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1783 lck_spin_lock(bucket_lock
);
1785 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= VM_PAGE_UNPACK_PTR(mem
->next_m
)) {
1788 * we don't hold the page queue lock
1789 * so this check isn't safe to make
1793 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1796 lck_spin_unlock(bucket_lock
);
1799 #if DEBUG_VM_PAGE_LOOKUP
1800 elapsed
= mach_absolute_time() - start
;
1803 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_slow
);
1804 OSAddAtomic64(elapsed
, &vm_page_lookup_stats
.vpl_slow_elapsed
);
1806 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_fast
);
1807 OSAddAtomic64(elapsed
, &vm_page_lookup_stats
.vpl_fast_elapsed
);
1809 if (mem
!= VM_PAGE_NULL
)
1810 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_hit
);
1812 OSAddAtomic64(1, &vm_page_lookup_stats
.vpl_miss
);
1814 if (mem
!= VM_PAGE_NULL
) {
1815 assert(mem
->object
== object
);
1817 object
->memq_hint
= mem
;
1826 * Move the given memory entry from its
1827 * current object to the specified target object/offset.
1829 * The object must be locked.
1833 register vm_page_t mem
,
1834 register vm_object_t new_object
,
1835 vm_object_offset_t new_offset
,
1836 boolean_t encrypted_ok
)
1838 boolean_t internal_to_external
, external_to_internal
;
1841 assert(mem
->object
!= new_object
);
1843 assert(mem
->object
);
1847 * The encryption key is based on the page's memory object
1848 * (aka "pager") and paging offset. Moving the page to
1849 * another VM object changes its "pager" and "paging_offset"
1850 * so it has to be decrypted first, or we would lose the key.
1852 * One exception is VM object collapsing, where we transfer pages
1853 * from one backing object to its parent object. This operation also
1854 * transfers the paging information, so the <pager,paging_offset> info
1855 * should remain consistent. The caller (vm_object_do_collapse())
1856 * sets "encrypted_ok" in this case.
1858 if (!encrypted_ok
&& mem
->encrypted
) {
1859 panic("vm_page_rename: page %p is encrypted\n", mem
);
1863 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1864 new_object
, new_offset
,
1868 * Changes to mem->object require the page lock because
1869 * the pageout daemon uses that lock to get the object.
1871 vm_page_lockspin_queues();
1873 internal_to_external
= FALSE
;
1874 external_to_internal
= FALSE
;
1878 * it's much easier to get the vm_page_pageable_xxx accounting correct
1879 * if we first move the page to the active queue... it's going to end
1880 * up there anyway, and we don't do vm_page_rename's frequently enough
1881 * for this to matter.
1883 vm_page_queues_remove(mem
);
1884 vm_page_activate(mem
);
1886 if (mem
->active
|| mem
->inactive
|| mem
->speculative
) {
1887 if (mem
->object
->internal
&& !new_object
->internal
) {
1888 internal_to_external
= TRUE
;
1890 if (!mem
->object
->internal
&& new_object
->internal
) {
1891 external_to_internal
= TRUE
;
1895 tag
= mem
->object
->wire_tag
;
1896 vm_page_remove(mem
, TRUE
);
1897 vm_page_insert_internal(mem
, new_object
, new_offset
, tag
, TRUE
, TRUE
, FALSE
, FALSE
, NULL
);
1899 if (internal_to_external
) {
1900 vm_page_pageable_internal_count
--;
1901 vm_page_pageable_external_count
++;
1902 } else if (external_to_internal
) {
1903 vm_page_pageable_external_count
--;
1904 vm_page_pageable_internal_count
++;
1907 vm_page_unlock_queues();
1913 * Initialize the fields in a new page.
1914 * This takes a structure with random values and initializes it
1915 * so that it can be given to vm_page_release or vm_page_insert.
1926 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1927 if (!(pmap_valid_page(phys_page
))) {
1928 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1932 *mem
= vm_page_template
;
1933 mem
->phys_page
= phys_page
;
1936 * we're leaving this turned off for now... currently pages
1937 * come off the free list and are either immediately dirtied/referenced
1938 * due to zero-fill or COW faults, or are used to read or write files...
1939 * in the file I/O case, the UPL mechanism takes care of clearing
1940 * the state of the HW ref/mod bits in a somewhat fragile way.
1941 * Since we may change the way this works in the future (to toughen it up),
1942 * I'm leaving this as a reminder of where these bits could get cleared
1946 * make sure both the h/w referenced and modified bits are
1947 * clear at this point... we are especially dependent on
1948 * not finding a 'stale' h/w modified in a number of spots
1949 * once this page goes back into use
1951 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1953 mem
->lopage
= lopage
;
1957 * vm_page_grab_fictitious:
1959 * Remove a fictitious page from the free list.
1960 * Returns VM_PAGE_NULL if there are no free pages.
1962 int c_vm_page_grab_fictitious
= 0;
1963 int c_vm_page_grab_fictitious_failed
= 0;
1964 int c_vm_page_release_fictitious
= 0;
1965 int c_vm_page_more_fictitious
= 0;
1968 vm_page_grab_fictitious_common(
1973 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1975 vm_page_init(m
, phys_addr
, FALSE
);
1976 m
->fictitious
= TRUE
;
1978 c_vm_page_grab_fictitious
++;
1980 c_vm_page_grab_fictitious_failed
++;
1986 vm_page_grab_fictitious(void)
1988 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1992 vm_page_grab_guard(void)
1994 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1999 * vm_page_release_fictitious:
2001 * Release a fictitious page to the zone pool
2004 vm_page_release_fictitious(
2008 assert(m
->fictitious
);
2009 assert(m
->phys_page
== vm_page_fictitious_addr
||
2010 m
->phys_page
== vm_page_guard_addr
);
2012 c_vm_page_release_fictitious
++;
2014 zfree(vm_page_zone
, m
);
2018 * vm_page_more_fictitious:
2020 * Add more fictitious pages to the zone.
2021 * Allowed to block. This routine is way intimate
2022 * with the zones code, for several reasons:
2023 * 1. we need to carve some page structures out of physical
2024 * memory before zones work, so they _cannot_ come from
2026 * 2. the zone needs to be collectable in order to prevent
2027 * growth without bound. These structures are used by
2028 * the device pager (by the hundreds and thousands), as
2029 * private pages for pageout, and as blocking pages for
2030 * pagein. Temporary bursts in demand should not result in
2031 * permanent allocation of a resource.
2032 * 3. To smooth allocation humps, we allocate single pages
2033 * with kernel_memory_allocate(), and cram them into the
2037 void vm_page_more_fictitious(void)
2040 kern_return_t retval
;
2042 c_vm_page_more_fictitious
++;
2045 * Allocate a single page from the zone_map. Do not wait if no physical
2046 * pages are immediately available, and do not zero the space. We need
2047 * our own blocking lock here to prevent having multiple,
2048 * simultaneous requests from piling up on the zone_map lock. Exactly
2049 * one (of our) threads should be potentially waiting on the map lock.
2050 * If winner is not vm-privileged, then the page allocation will fail,
2051 * and it will temporarily block here in the vm_page_wait().
2053 lck_mtx_lock(&vm_page_alloc_lock
);
2055 * If another thread allocated space, just bail out now.
2057 if (zone_free_count(vm_page_zone
) > 5) {
2059 * The number "5" is a small number that is larger than the
2060 * number of fictitious pages that any single caller will
2061 * attempt to allocate. Otherwise, a thread will attempt to
2062 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2063 * release all of the resources and locks already acquired,
2064 * and then call this routine. This routine finds the pages
2065 * that the caller released, so fails to allocate new space.
2066 * The process repeats infinitely. The largest known number
2067 * of fictitious pages required in this manner is 2. 5 is
2068 * simply a somewhat larger number.
2070 lck_mtx_unlock(&vm_page_alloc_lock
);
2074 retval
= kernel_memory_allocate(zone_map
,
2075 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
2076 KMA_KOBJECT
|KMA_NOPAGEWAIT
, VM_KERN_MEMORY_ZONE
);
2077 if (retval
!= KERN_SUCCESS
) {
2079 * No page was available. Drop the
2080 * lock to give another thread a chance at it, and
2081 * wait for the pageout daemon to make progress.
2083 lck_mtx_unlock(&vm_page_alloc_lock
);
2084 vm_page_wait(THREAD_UNINT
);
2088 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
2090 lck_mtx_unlock(&vm_page_alloc_lock
);
2097 * Return true if it is not likely that a non-vm_privileged thread
2098 * can get memory without blocking. Advisory only, since the
2099 * situation may change under us.
2104 /* No locking, at worst we will fib. */
2105 return( vm_page_free_count
<= vm_page_free_reserved
);
2111 * this is an interface to support bring-up of drivers
2112 * on platforms with physical memory > 4G...
2114 int vm_himemory_mode
= 2;
2118 * this interface exists to support hardware controllers
2119 * incapable of generating DMAs with more than 32 bits
2120 * of address on platforms with physical memory > 4G...
2122 unsigned int vm_lopages_allocated_q
= 0;
2123 unsigned int vm_lopages_allocated_cpm_success
= 0;
2124 unsigned int vm_lopages_allocated_cpm_failed
= 0;
2125 queue_head_t vm_lopage_queue_free
;
2128 vm_page_grablo(void)
2132 if (vm_lopage_needed
== FALSE
)
2133 return (vm_page_grab());
2135 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2137 if ( !queue_empty(&vm_lopage_queue_free
)) {
2138 queue_remove_first(&vm_lopage_queue_free
,
2142 assert(vm_lopage_free_count
);
2144 vm_lopage_free_count
--;
2145 vm_lopages_allocated_q
++;
2147 if (vm_lopage_free_count
< vm_lopage_lowater
)
2148 vm_lopage_refill
= TRUE
;
2150 lck_mtx_unlock(&vm_page_queue_free_lock
);
2152 lck_mtx_unlock(&vm_page_queue_free_lock
);
2154 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
2156 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2157 vm_lopages_allocated_cpm_failed
++;
2158 lck_mtx_unlock(&vm_page_queue_free_lock
);
2160 return (VM_PAGE_NULL
);
2164 vm_page_lockspin_queues();
2166 mem
->gobbled
= FALSE
;
2167 vm_page_gobble_count
--;
2168 vm_page_wire_count
--;
2170 vm_lopages_allocated_cpm_success
++;
2171 vm_page_unlock_queues();
2175 assert(!mem
->pmapped
);
2176 assert(!mem
->wpmapped
);
2177 assert(!pmap_is_noencrypt(mem
->phys_page
));
2179 mem
->pageq
.next
= NULL
;
2180 mem
->pageq
.prev
= NULL
;
2189 * first try to grab a page from the per-cpu free list...
2190 * this must be done while pre-emption is disabled... if
2191 * a page is available, we're done...
2192 * if no page is available, grab the vm_page_queue_free_lock
2193 * and see if current number of free pages would allow us
2194 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2195 * if there are pages available, disable preemption and
2196 * recheck the state of the per-cpu free list... we could
2197 * have been preempted and moved to a different cpu, or
2198 * some other thread could have re-filled it... if still
2199 * empty, figure out how many pages we can steal from the
2200 * global free queue and move to the per-cpu queue...
2201 * return 1 of these pages when done... only wakeup the
2202 * pageout_scan thread if we moved pages from the global
2203 * list... no need for the wakeup if we've satisfied the
2204 * request from the per-cpu queue.
2209 vm_page_grab( void )
2214 disable_preemption();
2216 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2217 return_page_from_cpu_list
:
2218 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2219 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
2221 enable_preemption();
2222 mem
->pageq
.next
= NULL
;
2224 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2225 assert(mem
->tabled
== FALSE
);
2226 assert(mem
->object
== VM_OBJECT_NULL
);
2227 assert(!mem
->laundry
);
2229 assert(pmap_verify_free(mem
->phys_page
));
2231 assert(!mem
->encrypted
);
2232 assert(!mem
->pmapped
);
2233 assert(!mem
->wpmapped
);
2234 assert(!mem
->active
);
2235 assert(!mem
->inactive
);
2236 assert(!mem
->throttled
);
2237 assert(!mem
->speculative
);
2238 assert(!pmap_is_noencrypt(mem
->phys_page
));
2242 enable_preemption();
2246 * Optionally produce warnings if the wire or gobble
2247 * counts exceed some threshold.
2249 #if VM_PAGE_WIRE_COUNT_WARNING
2250 if (vm_page_wire_count
>= VM_PAGE_WIRE_COUNT_WARNING
) {
2251 printf("mk: vm_page_grab(): high wired page count of %d\n",
2252 vm_page_wire_count
);
2255 #if VM_PAGE_GOBBLE_COUNT_WARNING
2256 if (vm_page_gobble_count
>= VM_PAGE_GOBBLE_COUNT_WARNING
) {
2257 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2258 vm_page_gobble_count
);
2261 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2264 * Only let privileged threads (involved in pageout)
2265 * dip into the reserved pool.
2267 if ((vm_page_free_count
< vm_page_free_reserved
) &&
2268 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
2269 lck_mtx_unlock(&vm_page_queue_free_lock
);
2275 unsigned int pages_to_steal
;
2278 while ( vm_page_free_count
== 0 ) {
2280 lck_mtx_unlock(&vm_page_queue_free_lock
);
2282 * must be a privileged thread to be
2283 * in this state since a non-privileged
2284 * thread would have bailed if we were
2285 * under the vm_page_free_reserved mark
2288 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2291 disable_preemption();
2293 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
2294 lck_mtx_unlock(&vm_page_queue_free_lock
);
2297 * we got preempted and moved to another processor
2298 * or we got preempted and someone else ran and filled the cache
2300 goto return_page_from_cpu_list
;
2302 if (vm_page_free_count
<= vm_page_free_reserved
)
2305 if (vm_free_magazine_refill_limit
<= (vm_page_free_count
- vm_page_free_reserved
))
2306 pages_to_steal
= vm_free_magazine_refill_limit
;
2308 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
2310 color
= PROCESSOR_DATA(current_processor(), start_color
);
2313 vm_page_free_count
-= pages_to_steal
;
2315 while (pages_to_steal
--) {
2317 while (queue_empty(&vm_page_queue_free
[color
]))
2318 color
= (color
+ 1) & vm_color_mask
;
2320 queue_remove_first(&vm_page_queue_free
[color
],
2324 mem
->pageq
.next
= NULL
;
2325 mem
->pageq
.prev
= NULL
;
2327 assert(!mem
->active
);
2328 assert(!mem
->inactive
);
2329 assert(!mem
->throttled
);
2330 assert(!mem
->speculative
);
2332 color
= (color
+ 1) & vm_color_mask
;
2337 tail
->pageq
.next
= (queue_t
)mem
;
2340 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2341 assert(mem
->tabled
== FALSE
);
2342 assert(mem
->object
== VM_OBJECT_NULL
);
2343 assert(!mem
->laundry
);
2347 assert(pmap_verify_free(mem
->phys_page
));
2350 assert(!mem
->encrypted
);
2351 assert(!mem
->pmapped
);
2352 assert(!mem
->wpmapped
);
2353 assert(!pmap_is_noencrypt(mem
->phys_page
));
2355 lck_mtx_unlock(&vm_page_queue_free_lock
);
2357 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
2358 PROCESSOR_DATA(current_processor(), start_color
) = color
;
2361 * satisfy this request
2363 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2365 mem
->pageq
.next
= NULL
;
2367 enable_preemption();
2370 * Decide if we should poke the pageout daemon.
2371 * We do this if the free count is less than the low
2372 * water mark, or if the free count is less than the high
2373 * water mark (but above the low water mark) and the inactive
2374 * count is less than its target.
2376 * We don't have the counts locked ... if they change a little,
2377 * it doesn't really matter.
2379 if ((vm_page_free_count
< vm_page_free_min
) ||
2380 ((vm_page_free_count
< vm_page_free_target
) &&
2381 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
2382 thread_wakeup((event_t
) &vm_page_free_wanted
);
2384 VM_CHECK_MEMORYSTATUS
;
2386 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2394 * Return a page to the free list.
2399 register vm_page_t mem
)
2402 int need_wakeup
= 0;
2403 int need_priv_wakeup
= 0;
2406 assert(!mem
->private && !mem
->fictitious
);
2407 if (vm_page_free_verify
) {
2408 assert(pmap_verify_free(mem
->phys_page
));
2410 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2412 pmap_clear_noencrypt(mem
->phys_page
);
2414 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2417 panic("vm_page_release");
2421 assert(!mem
->laundry
);
2422 assert(mem
->object
== VM_OBJECT_NULL
);
2423 assert(mem
->pageq
.next
== NULL
&&
2424 mem
->pageq
.prev
== NULL
);
2425 assert(mem
->listq
.next
== NULL
&&
2426 mem
->listq
.prev
== NULL
);
2428 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2429 vm_lopage_free_count
< vm_lopage_free_limit
&&
2430 mem
->phys_page
< max_valid_low_ppnum
) {
2432 * this exists to support hardware controllers
2433 * incapable of generating DMAs with more than 32 bits
2434 * of address on platforms with physical memory > 4G...
2436 queue_enter_first(&vm_lopage_queue_free
,
2440 vm_lopage_free_count
++;
2442 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
2443 vm_lopage_refill
= FALSE
;
2447 mem
->lopage
= FALSE
;
2450 color
= mem
->phys_page
& vm_color_mask
;
2451 queue_enter_first(&vm_page_queue_free
[color
],
2455 vm_page_free_count
++;
2457 * Check if we should wake up someone waiting for page.
2458 * But don't bother waking them unless they can allocate.
2460 * We wakeup only one thread, to prevent starvation.
2461 * Because the scheduling system handles wait queues FIFO,
2462 * if we wakeup all waiting threads, one greedy thread
2463 * can starve multiple niceguy threads. When the threads
2464 * all wakeup, the greedy threads runs first, grabs the page,
2465 * and waits for another page. It will be the first to run
2466 * when the next page is freed.
2468 * However, there is a slight danger here.
2469 * The thread we wake might not use the free page.
2470 * Then the other threads could wait indefinitely
2471 * while the page goes unused. To forestall this,
2472 * the pageout daemon will keep making free pages
2473 * as long as vm_page_free_wanted is non-zero.
2476 assert(vm_page_free_count
> 0);
2477 if (vm_page_free_wanted_privileged
> 0) {
2478 vm_page_free_wanted_privileged
--;
2479 need_priv_wakeup
= 1;
2480 } else if (vm_page_free_wanted
> 0 &&
2481 vm_page_free_count
> vm_page_free_reserved
) {
2482 vm_page_free_wanted
--;
2486 lck_mtx_unlock(&vm_page_queue_free_lock
);
2488 if (need_priv_wakeup
)
2489 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2490 else if (need_wakeup
)
2491 thread_wakeup_one((event_t
) &vm_page_free_count
);
2493 VM_CHECK_MEMORYSTATUS
;
2497 * This version of vm_page_release() is used only at startup
2498 * when we are single-threaded and pages are being released
2499 * for the first time. Hence, no locking or unnecessary checks are made.
2500 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2503 vm_page_release_startup(
2504 register vm_page_t mem
)
2508 if (vm_lopage_free_count
< vm_lopage_free_limit
&&
2509 mem
->phys_page
< max_valid_low_ppnum
) {
2511 vm_lopage_free_count
++;
2512 queue_free
= &vm_lopage_queue_free
;
2514 mem
->lopage
= FALSE
;
2516 vm_page_free_count
++;
2517 queue_free
= &vm_page_queue_free
[mem
->phys_page
& vm_color_mask
];
2519 queue_enter_first(queue_free
, mem
, vm_page_t
, pageq
);
2525 * Wait for a page to become available.
2526 * If there are plenty of free pages, then we don't sleep.
2529 * TRUE: There may be another page, try again
2530 * FALSE: We were interrupted out of our wait, don't try again
2538 * We can't use vm_page_free_reserved to make this
2539 * determination. Consider: some thread might
2540 * need to allocate two pages. The first allocation
2541 * succeeds, the second fails. After the first page is freed,
2542 * a call to vm_page_wait must really block.
2544 kern_return_t wait_result
;
2545 int need_wakeup
= 0;
2546 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2548 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2550 if (is_privileged
&& vm_page_free_count
) {
2551 lck_mtx_unlock(&vm_page_queue_free_lock
);
2554 if (vm_page_free_count
< vm_page_free_target
) {
2556 if (is_privileged
) {
2557 if (vm_page_free_wanted_privileged
++ == 0)
2559 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2561 if (vm_page_free_wanted
++ == 0)
2563 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2565 lck_mtx_unlock(&vm_page_queue_free_lock
);
2566 counter(c_vm_page_wait_block
++);
2569 thread_wakeup((event_t
)&vm_page_free_wanted
);
2571 if (wait_result
== THREAD_WAITING
) {
2572 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
2573 vm_page_free_wanted_privileged
, vm_page_free_wanted
, 0, 0);
2574 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2575 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
2578 return(wait_result
== THREAD_AWAKENED
);
2580 lck_mtx_unlock(&vm_page_queue_free_lock
);
2588 * Allocate and return a memory cell associated
2589 * with this VM object/offset pair.
2591 * Object must be locked.
2597 vm_object_offset_t offset
)
2599 register vm_page_t mem
;
2601 vm_object_lock_assert_exclusive(object
);
2602 mem
= vm_page_grab();
2603 if (mem
== VM_PAGE_NULL
)
2604 return VM_PAGE_NULL
;
2606 vm_page_insert(mem
, object
, offset
);
2612 * vm_page_alloc_guard:
2614 * Allocate a fictitious page which will be used
2615 * as a guard page. The page will be inserted into
2616 * the object and returned to the caller.
2620 vm_page_alloc_guard(
2622 vm_object_offset_t offset
)
2624 register vm_page_t mem
;
2626 vm_object_lock_assert_exclusive(object
);
2627 mem
= vm_page_grab_guard();
2628 if (mem
== VM_PAGE_NULL
)
2629 return VM_PAGE_NULL
;
2631 vm_page_insert(mem
, object
, offset
);
2637 counter(unsigned int c_laundry_pages_freed
= 0;)
2640 * vm_page_free_prepare:
2642 * Removes page from any queue it may be on
2643 * and disassociates it from its VM object.
2645 * Object and page queues must be locked prior to entry.
2648 vm_page_free_prepare(
2651 vm_page_free_prepare_queues(mem
);
2652 vm_page_free_prepare_object(mem
, TRUE
);
2657 vm_page_free_prepare_queues(
2662 assert(!mem
->cleaning
);
2664 #if MACH_ASSERT || DEBUG
2665 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2667 panic("vm_page_free: freeing page on free list\n");
2668 #endif /* MACH_ASSERT || DEBUG */
2670 vm_object_lock_assert_exclusive(mem
->object
);
2674 * We may have to free a page while it's being laundered
2675 * if we lost its pager (due to a forced unmount, for example).
2676 * We need to call vm_pageout_steal_laundry() before removing
2677 * the page from its VM object, so that we can remove it
2678 * from its pageout queue and adjust the laundry accounting
2680 vm_pageout_steal_laundry(mem
, TRUE
);
2681 counter(++c_laundry_pages_freed
);
2684 vm_page_queues_remove(mem
); /* clears local/active/inactive/throttled/speculative */
2686 if (VM_PAGE_WIRED(mem
)) {
2688 assert(mem
->object
->wired_page_count
> 0);
2689 mem
->object
->wired_page_count
--;
2690 if (!mem
->object
->wired_page_count
) {
2691 VM_OBJECT_UNWIRED(mem
->object
);
2694 assert(mem
->object
->resident_page_count
>=
2695 mem
->object
->wired_page_count
);
2697 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2698 OSAddAtomic(+1, &vm_page_purgeable_count
);
2699 assert(vm_page_purgeable_wired_count
> 0);
2700 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2702 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
2703 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
2704 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
2707 owner
= mem
->object
->vo_purgeable_owner
;
2709 * While wired, this page was accounted
2710 * as "non-volatile" but it should now
2711 * be accounted as "volatile".
2713 /* one less "non-volatile"... */
2714 ledger_debit(owner
->ledger
,
2715 task_ledgers
.purgeable_nonvolatile
,
2717 /* ... and "phys_footprint" */
2718 ledger_debit(owner
->ledger
,
2719 task_ledgers
.phys_footprint
,
2721 /* one more "volatile" */
2722 ledger_credit(owner
->ledger
,
2723 task_ledgers
.purgeable_volatile
,
2727 if (!mem
->private && !mem
->fictitious
)
2728 vm_page_wire_count
--;
2729 mem
->wire_count
= 0;
2730 assert(!mem
->gobbled
);
2731 } else if (mem
->gobbled
) {
2732 if (!mem
->private && !mem
->fictitious
)
2733 vm_page_wire_count
--;
2734 vm_page_gobble_count
--;
2740 vm_page_free_prepare_object(
2742 boolean_t remove_from_hash
)
2745 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2747 PAGE_WAKEUP(mem
); /* clears wanted */
2750 mem
->private = FALSE
;
2751 mem
->fictitious
= TRUE
;
2752 mem
->phys_page
= vm_page_fictitious_addr
;
2754 if ( !mem
->fictitious
) {
2755 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2763 * Returns the given page to the free list,
2764 * disassociating it with any VM object.
2766 * Object and page queues must be locked prior to entry.
2772 vm_page_free_prepare(mem
);
2774 if (mem
->fictitious
) {
2775 vm_page_release_fictitious(mem
);
2777 vm_page_release(mem
);
2783 vm_page_free_unlocked(
2785 boolean_t remove_from_hash
)
2787 vm_page_lockspin_queues();
2788 vm_page_free_prepare_queues(mem
);
2789 vm_page_unlock_queues();
2791 vm_page_free_prepare_object(mem
, remove_from_hash
);
2793 if (mem
->fictitious
) {
2794 vm_page_release_fictitious(mem
);
2796 vm_page_release(mem
);
2802 * Free a list of pages. The list can be up to several hundred pages,
2803 * as blocked up by vm_pageout_scan().
2804 * The big win is not having to take the free list lock once
2810 boolean_t prepare_object
)
2814 vm_page_t local_freeq
;
2820 local_freeq
= VM_PAGE_NULL
;
2824 * break up the processing into smaller chunks so
2825 * that we can 'pipeline' the pages onto the
2826 * free list w/o introducing too much
2827 * contention on the global free queue lock
2829 while (mem
&& pg_count
< 64) {
2831 assert(!mem
->inactive
);
2832 assert(!mem
->active
);
2833 assert(!mem
->throttled
);
2835 assert(!mem
->speculative
);
2836 assert(!VM_PAGE_WIRED(mem
));
2837 assert(mem
->pageq
.prev
== NULL
);
2839 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2841 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2842 assert(pmap_verify_free(mem
->phys_page
));
2844 if (prepare_object
== TRUE
)
2845 vm_page_free_prepare_object(mem
, TRUE
);
2847 if (!mem
->fictitious
) {
2850 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2851 vm_lopage_free_count
< vm_lopage_free_limit
&&
2852 mem
->phys_page
< max_valid_low_ppnum
) {
2853 mem
->pageq
.next
= NULL
;
2854 vm_page_release(mem
);
2857 * IMPORTANT: we can't set the page "free" here
2858 * because that would make the page eligible for
2859 * a physically-contiguous allocation (see
2860 * vm_page_find_contiguous()) right away (we don't
2861 * hold the vm_page_queue_free lock). That would
2862 * cause trouble because the page is not actually
2863 * in the free queue yet...
2865 mem
->pageq
.next
= (queue_entry_t
)local_freeq
;
2869 pmap_clear_noencrypt(mem
->phys_page
);
2872 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2873 mem
->phys_page
== vm_page_guard_addr
);
2874 vm_page_release_fictitious(mem
);
2880 if ( (mem
= local_freeq
) ) {
2881 unsigned int avail_free_count
;
2882 unsigned int need_wakeup
= 0;
2883 unsigned int need_priv_wakeup
= 0;
2885 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2890 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2896 color
= mem
->phys_page
& vm_color_mask
;
2897 queue_enter_first(&vm_page_queue_free
[color
],
2903 vm_page_free_count
+= pg_count
;
2904 avail_free_count
= vm_page_free_count
;
2906 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
2908 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2909 need_priv_wakeup
= avail_free_count
;
2910 vm_page_free_wanted_privileged
-= avail_free_count
;
2911 avail_free_count
= 0;
2913 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2914 vm_page_free_wanted_privileged
= 0;
2915 avail_free_count
-= vm_page_free_wanted_privileged
;
2918 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
2919 unsigned int available_pages
;
2921 available_pages
= avail_free_count
- vm_page_free_reserved
;
2923 if (available_pages
>= vm_page_free_wanted
) {
2924 need_wakeup
= vm_page_free_wanted
;
2925 vm_page_free_wanted
= 0;
2927 need_wakeup
= available_pages
;
2928 vm_page_free_wanted
-= available_pages
;
2931 lck_mtx_unlock(&vm_page_queue_free_lock
);
2933 if (need_priv_wakeup
!= 0) {
2935 * There shouldn't be that many VM-privileged threads,
2936 * so let's wake them all up, even if we don't quite
2937 * have enough pages to satisfy them all.
2939 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2941 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2943 * We don't expect to have any more waiters
2944 * after this, so let's wake them all up at
2947 thread_wakeup((event_t
) &vm_page_free_count
);
2948 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2950 * Wake up one waiter per page we just released.
2952 thread_wakeup_one((event_t
) &vm_page_free_count
);
2955 VM_CHECK_MEMORYSTATUS
;
2964 * Mark this page as wired down by yet
2965 * another map, removing it from paging queues
2968 * The page's object and the page queues must be locked.
2974 register vm_page_t mem
,
2976 boolean_t check_memorystatus
)
2979 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2983 vm_object_lock_assert_exclusive(mem
->object
);
2986 * In theory, the page should be in an object before it
2987 * gets wired, since we need to hold the object lock
2988 * to update some fields in the page structure.
2989 * However, some code (i386 pmap, for example) might want
2990 * to wire a page before it gets inserted into an object.
2991 * That's somewhat OK, as long as nobody else can get to
2992 * that page and update it at the same time.
2996 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2998 if ( !VM_PAGE_WIRED(mem
)) {
3000 if (mem
->pageout_queue
) {
3001 mem
->pageout
= FALSE
;
3002 vm_pageout_throttle_up(mem
);
3004 vm_page_queues_remove(mem
);
3008 if (!mem
->private && !mem
->fictitious
)
3010 if (!mem
->object
->wired_page_count
)
3012 assert(VM_KERN_MEMORY_NONE
!= tag
);
3013 mem
->object
->wire_tag
= tag
;
3014 VM_OBJECT_WIRED(mem
->object
);
3017 mem
->object
->wired_page_count
++;
3019 assert(mem
->object
->resident_page_count
>=
3020 mem
->object
->wired_page_count
);
3021 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
3022 assert(vm_page_purgeable_count
> 0);
3023 OSAddAtomic(-1, &vm_page_purgeable_count
);
3024 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
3026 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
3027 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
3028 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
3031 owner
= mem
->object
->vo_purgeable_owner
;
3032 /* less volatile bytes */
3033 ledger_debit(owner
->ledger
,
3034 task_ledgers
.purgeable_volatile
,
3036 /* more not-quite-volatile bytes */
3037 ledger_credit(owner
->ledger
,
3038 task_ledgers
.purgeable_nonvolatile
,
3040 /* more footprint */
3041 ledger_credit(owner
->ledger
,
3042 task_ledgers
.phys_footprint
,
3045 if (mem
->object
->all_reusable
) {
3047 * Wired pages are not counted as "re-usable"
3048 * in "all_reusable" VM objects, so nothing
3051 } else if (mem
->reusable
) {
3053 * This page is not "re-usable" when it's
3054 * wired, so adjust its state and the
3057 vm_object_reuse_pages(mem
->object
,
3059 mem
->offset
+PAGE_SIZE_64
,
3063 assert(!mem
->reusable
);
3065 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
3066 vm_page_wire_count
++;
3068 vm_page_gobble_count
--;
3069 mem
->gobbled
= FALSE
;
3071 if (check_memorystatus
== TRUE
) {
3072 VM_CHECK_MEMORYSTATUS
;
3076 * The page could be encrypted, but
3077 * We don't have to decrypt it here
3078 * because we don't guarantee that the
3079 * data is actually valid at this point.
3080 * The page will get decrypted in
3081 * vm_fault_wire() if needed.
3084 assert(!mem
->gobbled
);
3092 * Release one wiring of this page, potentially
3093 * enabling it to be paged again.
3095 * The page's object and the page queues must be locked.
3103 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
3106 assert(VM_PAGE_WIRED(mem
));
3107 assert(!mem
->gobbled
);
3108 assert(mem
->object
!= VM_OBJECT_NULL
);
3110 vm_object_lock_assert_exclusive(mem
->object
);
3111 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3113 if (--mem
->wire_count
== 0) {
3114 if (!mem
->private && !mem
->fictitious
) {
3115 vm_page_wire_count
--;
3117 assert(mem
->object
->wired_page_count
> 0);
3118 mem
->object
->wired_page_count
--;
3119 if (!mem
->object
->wired_page_count
) {
3120 VM_OBJECT_UNWIRED(mem
->object
);
3122 assert(mem
->object
->resident_page_count
>=
3123 mem
->object
->wired_page_count
);
3124 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
3125 OSAddAtomic(+1, &vm_page_purgeable_count
);
3126 assert(vm_page_purgeable_wired_count
> 0);
3127 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
3129 if ((mem
->object
->purgable
== VM_PURGABLE_VOLATILE
||
3130 mem
->object
->purgable
== VM_PURGABLE_EMPTY
) &&
3131 mem
->object
->vo_purgeable_owner
!= TASK_NULL
) {
3134 owner
= mem
->object
->vo_purgeable_owner
;
3135 /* more volatile bytes */
3136 ledger_credit(owner
->ledger
,
3137 task_ledgers
.purgeable_volatile
,
3139 /* less not-quite-volatile bytes */
3140 ledger_debit(owner
->ledger
,
3141 task_ledgers
.purgeable_nonvolatile
,
3143 /* less footprint */
3144 ledger_debit(owner
->ledger
,
3145 task_ledgers
.phys_footprint
,
3148 assert(mem
->object
!= kernel_object
);
3149 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
3151 if (queueit
== TRUE
) {
3152 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
3153 vm_page_deactivate(mem
);
3155 vm_page_activate(mem
);
3159 VM_CHECK_MEMORYSTATUS
;
3166 * vm_page_deactivate:
3168 * Returns the given page to the inactive list,
3169 * indicating that no physical maps have access
3170 * to this page. [Used by the physical mapping system.]
3172 * The page queues must be locked.
3178 vm_page_deactivate_internal(m
, TRUE
);
3183 vm_page_deactivate_internal(
3185 boolean_t clear_hw_reference
)
3189 assert(m
->object
!= kernel_object
);
3190 assert(m
->phys_page
!= vm_page_guard_addr
);
3192 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3194 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3197 * This page is no longer very interesting. If it was
3198 * interesting (active or inactive/referenced), then we
3199 * clear the reference bit and (re)enter it in the
3200 * inactive queue. Note wired pages should not have
3201 * their reference bit cleared.
3203 assert ( !(m
->absent
&& !m
->unusual
));
3205 if (m
->gobbled
) { /* can this happen? */
3206 assert( !VM_PAGE_WIRED(m
));
3208 if (!m
->private && !m
->fictitious
)
3209 vm_page_wire_count
--;
3210 vm_page_gobble_count
--;
3214 * if this page is currently on the pageout queue, we can't do the
3215 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3216 * and we can't remove it manually since we would need the object lock
3217 * (which is not required here) to decrement the activity_in_progress
3218 * reference which is held on the object while the page is in the pageout queue...
3219 * just let the normal laundry processing proceed
3221 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
|| (VM_PAGE_WIRED(m
)))
3224 if (!m
->absent
&& clear_hw_reference
== TRUE
)
3225 pmap_clear_reference(m
->phys_page
);
3227 m
->reference
= FALSE
;
3228 m
->no_cache
= FALSE
;
3231 vm_page_queues_remove(m
);
3233 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3234 m
->dirty
&& m
->object
->internal
&&
3235 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3236 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3237 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3238 vm_page_check_pageable_safe(m
);
3239 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3240 m
->throttled
= TRUE
;
3241 vm_page_throttled_count
++;
3243 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
3244 vm_page_speculate(m
, FALSE
);
3245 #if DEVELOPMENT || DEBUG
3246 vm_page_speculative_recreated
++;
3249 vm_page_enqueue_inactive(m
, FALSE
);
3256 * vm_page_enqueue_cleaned
3258 * Put the page on the cleaned queue, mark it cleaned, etc.
3259 * Being on the cleaned queue (and having m->clean_queue set)
3260 * does ** NOT ** guarantee that the page is clean!
3262 * Call with the queues lock held.
3265 void vm_page_enqueue_cleaned(vm_page_t m
)
3267 assert(m
->phys_page
!= vm_page_guard_addr
);
3269 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3271 assert( !(m
->absent
&& !m
->unusual
));
3274 assert( !VM_PAGE_WIRED(m
));
3275 if (!m
->private && !m
->fictitious
)
3276 vm_page_wire_count
--;
3277 vm_page_gobble_count
--;
3281 * if this page is currently on the pageout queue, we can't do the
3282 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3283 * and we can't remove it manually since we would need the object lock
3284 * (which is not required here) to decrement the activity_in_progress
3285 * reference which is held on the object while the page is in the pageout queue...
3286 * just let the normal laundry processing proceed
3288 if (m
->laundry
|| m
->clean_queue
|| m
->pageout_queue
|| m
->private || m
->fictitious
)
3291 vm_page_queues_remove(m
);
3293 vm_page_check_pageable_safe(m
);
3294 queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
3295 m
->clean_queue
= TRUE
;
3296 vm_page_cleaned_count
++;
3299 vm_page_inactive_count
++;
3300 if (m
->object
->internal
) {
3301 vm_page_pageable_internal_count
++;
3303 vm_page_pageable_external_count
++;
3306 vm_pageout_enqueued_cleaned
++;
3312 * Put the specified page on the active list (if appropriate).
3314 * The page queues must be locked.
3319 register vm_page_t m
)
3322 #ifdef FIXME_4778297
3323 assert(m
->object
!= kernel_object
);
3325 assert(m
->phys_page
!= vm_page_guard_addr
);
3327 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3329 assert( !(m
->absent
&& !m
->unusual
));
3332 assert( !VM_PAGE_WIRED(m
));
3333 if (!m
->private && !m
->fictitious
)
3334 vm_page_wire_count
--;
3335 vm_page_gobble_count
--;
3339 * if this page is currently on the pageout queue, we can't do the
3340 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3341 * and we can't remove it manually since we would need the object lock
3342 * (which is not required here) to decrement the activity_in_progress
3343 * reference which is held on the object while the page is in the pageout queue...
3344 * just let the normal laundry processing proceed
3346 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3351 panic("vm_page_activate: already active");
3354 if (m
->speculative
) {
3355 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
3356 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
3359 vm_page_queues_remove(m
);
3361 if ( !VM_PAGE_WIRED(m
)) {
3362 vm_page_check_pageable_safe(m
);
3363 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
3364 m
->dirty
&& m
->object
->internal
&&
3365 (m
->object
->purgable
== VM_PURGABLE_DENY
||
3366 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
3367 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
3368 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
3369 m
->throttled
= TRUE
;
3370 vm_page_throttled_count
++;
3372 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
3374 vm_page_active_count
++;
3375 if (m
->object
->internal
) {
3376 vm_page_pageable_internal_count
++;
3378 vm_page_pageable_external_count
++;
3381 m
->reference
= TRUE
;
3382 m
->no_cache
= FALSE
;
3389 * vm_page_speculate:
3391 * Put the specified page on the speculative list (if appropriate).
3393 * The page queues must be locked.
3400 struct vm_speculative_age_q
*aq
;
3403 vm_page_check_pageable_safe(m
);
3405 assert(m
->phys_page
!= vm_page_guard_addr
);
3407 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3409 assert( !(m
->absent
&& !m
->unusual
));
3412 * if this page is currently on the pageout queue, we can't do the
3413 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3414 * and we can't remove it manually since we would need the object lock
3415 * (which is not required here) to decrement the activity_in_progress
3416 * reference which is held on the object while the page is in the pageout queue...
3417 * just let the normal laundry processing proceed
3419 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3422 vm_page_queues_remove(m
);
3424 if ( !VM_PAGE_WIRED(m
)) {
3429 clock_get_system_nanotime(&sec
, &nsec
);
3430 ts
.tv_sec
= (unsigned int) sec
;
3433 if (vm_page_speculative_count
== 0) {
3435 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3436 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3438 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3441 * set the timer to begin a new group
3443 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3444 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3446 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3448 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3450 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
3452 speculative_age_index
++;
3454 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3455 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3456 if (speculative_age_index
== speculative_steal_index
) {
3457 speculative_steal_index
= speculative_age_index
+ 1;
3459 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3460 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3462 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3464 if (!queue_empty(&aq
->age_q
))
3465 vm_page_speculate_ageit(aq
);
3467 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3468 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3470 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3473 enqueue_tail(&aq
->age_q
, &m
->pageq
);
3474 m
->speculative
= TRUE
;
3475 vm_page_speculative_count
++;
3476 if (m
->object
->internal
) {
3477 vm_page_pageable_internal_count
++;
3479 vm_page_pageable_external_count
++;
3483 vm_object_lock_assert_exclusive(m
->object
);
3485 m
->object
->pages_created
++;
3486 #if DEVELOPMENT || DEBUG
3487 vm_page_speculative_created
++;
3496 * move pages from the specified aging bin to
3497 * the speculative bin that pageout_scan claims from
3499 * The page queues must be locked.
3502 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3504 struct vm_speculative_age_q
*sq
;
3507 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3509 if (queue_empty(&sq
->age_q
)) {
3510 sq
->age_q
.next
= aq
->age_q
.next
;
3511 sq
->age_q
.prev
= aq
->age_q
.prev
;
3513 t
= (vm_page_t
)sq
->age_q
.next
;
3514 t
->pageq
.prev
= &sq
->age_q
;
3516 t
= (vm_page_t
)sq
->age_q
.prev
;
3517 t
->pageq
.next
= &sq
->age_q
;
3519 t
= (vm_page_t
)sq
->age_q
.prev
;
3520 t
->pageq
.next
= aq
->age_q
.next
;
3522 t
= (vm_page_t
)aq
->age_q
.next
;
3523 t
->pageq
.prev
= sq
->age_q
.prev
;
3525 t
= (vm_page_t
)aq
->age_q
.prev
;
3526 t
->pageq
.next
= &sq
->age_q
;
3528 sq
->age_q
.prev
= aq
->age_q
.prev
;
3530 queue_init(&aq
->age_q
);
3539 assert(m
->object
!= kernel_object
);
3540 assert(m
->phys_page
!= vm_page_guard_addr
);
3543 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3546 * if this page is currently on the pageout queue, we can't do the
3547 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3548 * and we can't remove it manually since we would need the object lock
3549 * (which is not required here) to decrement the activity_in_progress
3550 * reference which is held on the object while the page is in the pageout queue...
3551 * just let the normal laundry processing proceed
3553 if (m
->laundry
|| m
->pageout_queue
|| m
->private || m
->compressor
|| (VM_PAGE_WIRED(m
)))
3556 m
->no_cache
= FALSE
;
3558 vm_page_queues_remove(m
);
3560 vm_page_enqueue_inactive(m
, FALSE
);
3565 vm_page_reactivate_all_throttled(void)
3567 vm_page_t first_throttled
, last_throttled
;
3568 vm_page_t first_active
;
3570 int extra_active_count
;
3571 int extra_internal_count
, extra_external_count
;
3573 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3576 extra_active_count
= 0;
3577 extra_internal_count
= 0;
3578 extra_external_count
= 0;
3579 vm_page_lock_queues();
3580 if (! queue_empty(&vm_page_queue_throttled
)) {
3582 * Switch "throttled" pages to "active".
3584 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3586 assert(m
->throttled
);
3588 assert(!m
->inactive
);
3589 assert(!m
->speculative
);
3590 assert(!VM_PAGE_WIRED(m
));
3592 extra_active_count
++;
3593 if (m
->object
->internal
) {
3594 extra_internal_count
++;
3596 extra_external_count
++;
3599 m
->throttled
= FALSE
;
3605 * Transfer the entire throttled queue to a regular LRU page queues.
3606 * We insert it at the head of the active queue, so that these pages
3607 * get re-evaluated by the LRU algorithm first, since they've been
3608 * completely out of it until now.
3610 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3611 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3612 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3613 if (queue_empty(&vm_page_queue_active
)) {
3614 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3616 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3618 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3619 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3620 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3623 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3625 queue_init(&vm_page_queue_throttled
);
3627 * Adjust the global page counts.
3629 vm_page_active_count
+= extra_active_count
;
3630 vm_page_pageable_internal_count
+= extra_internal_count
;
3631 vm_page_pageable_external_count
+= extra_external_count
;
3632 vm_page_throttled_count
= 0;
3634 assert(vm_page_throttled_count
== 0);
3635 assert(queue_empty(&vm_page_queue_throttled
));
3636 vm_page_unlock_queues();
3641 * move pages from the indicated local queue to the global active queue
3642 * its ok to fail if we're below the hard limit and force == FALSE
3643 * the nolocks == TRUE case is to allow this function to be run on
3644 * the hibernate path
3648 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3651 vm_page_t first_local
, last_local
;
3652 vm_page_t first_active
;
3656 if (vm_page_local_q
== NULL
)
3659 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3661 if (nolocks
== FALSE
) {
3662 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3663 if ( !vm_page_trylockspin_queues())
3666 vm_page_lockspin_queues();
3668 VPL_LOCK(&lq
->vpl_lock
);
3670 if (lq
->vpl_count
) {
3672 * Switch "local" pages to "active".
3674 assert(!queue_empty(&lq
->vpl_queue
));
3676 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3678 vm_page_check_pageable_safe(m
);
3681 assert(!m
->inactive
);
3682 assert(!m
->speculative
);
3683 assert(!VM_PAGE_WIRED(m
));
3684 assert(!m
->throttled
);
3685 assert(!m
->fictitious
);
3687 if (m
->local_id
!= lid
)
3688 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3697 if (count
!= lq
->vpl_count
)
3698 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3701 * Transfer the entire local queue to a regular LRU page queues.
3703 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3704 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3705 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3707 if (queue_empty(&vm_page_queue_active
)) {
3708 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3710 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3712 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3713 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3714 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3716 queue_init(&lq
->vpl_queue
);
3718 * Adjust the global page counts.
3720 vm_page_active_count
+= lq
->vpl_count
;
3721 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
3722 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
3724 lq
->vpl_internal_count
= 0;
3725 lq
->vpl_external_count
= 0;
3727 assert(queue_empty(&lq
->vpl_queue
));
3729 if (nolocks
== FALSE
) {
3730 VPL_UNLOCK(&lq
->vpl_lock
);
3731 vm_page_unlock_queues();
3736 * vm_page_part_zero_fill:
3738 * Zero-fill a part of the page.
3740 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3742 vm_page_part_zero_fill(
3750 * we don't hold the page queue lock
3751 * so this check isn't safe to make
3756 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3757 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3761 tmp
= vm_page_grab();
3762 if (tmp
== VM_PAGE_NULL
) {
3763 vm_page_wait(THREAD_UNINT
);
3768 vm_page_zero_fill(tmp
);
3770 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3772 if((m_pa
+ len
) < PAGE_SIZE
) {
3773 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3774 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3776 vm_page_copy(tmp
,m
);
3783 * vm_page_zero_fill:
3785 * Zero-fill the specified page.
3792 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3793 m
->object
, m
->offset
, m
, 0,0);
3796 * we don't hold the page queue lock
3797 * so this check isn't safe to make
3802 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3803 pmap_zero_page(m
->phys_page
);
3807 * vm_page_part_copy:
3809 * copy part of one page to another
3822 * we don't hold the page queue lock
3823 * so this check isn't safe to make
3825 VM_PAGE_CHECK(src_m
);
3826 VM_PAGE_CHECK(dst_m
);
3828 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3829 dst_m
->phys_page
, dst_pa
, len
);
3835 * Copy one page to another
3838 * The source page should not be encrypted. The caller should
3839 * make sure the page is decrypted first, if necessary.
3842 int vm_page_copy_cs_validations
= 0;
3843 int vm_page_copy_cs_tainted
= 0;
3851 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3852 src_m
->object
, src_m
->offset
,
3853 dest_m
->object
, dest_m
->offset
,
3857 * we don't hold the page queue lock
3858 * so this check isn't safe to make
3860 VM_PAGE_CHECK(src_m
);
3861 VM_PAGE_CHECK(dest_m
);
3863 vm_object_lock_assert_held(src_m
->object
);
3867 * The source page should not be encrypted at this point.
3868 * The destination page will therefore not contain encrypted
3869 * data after the copy.
3871 if (src_m
->encrypted
) {
3872 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3874 dest_m
->encrypted
= FALSE
;
3876 if (src_m
->object
!= VM_OBJECT_NULL
&&
3877 src_m
->object
->code_signed
) {
3879 * We're copying a page from a code-signed object.
3880 * Whoever ends up mapping the copy page might care about
3881 * the original page's integrity, so let's validate the
3884 vm_page_copy_cs_validations
++;
3885 vm_page_validate_cs(src_m
);
3888 if (vm_page_is_slideable(src_m
)) {
3889 boolean_t was_busy
= src_m
->busy
;
3891 (void) vm_page_slide(src_m
, 0);
3892 assert(src_m
->busy
);
3894 PAGE_WAKEUP_DONE(src_m
);
3899 * Propagate the cs_tainted bit to the copy page. Do not propagate
3900 * the cs_validated bit.
3902 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3903 if (dest_m
->cs_tainted
) {
3904 vm_page_copy_cs_tainted
++;
3906 dest_m
->slid
= src_m
->slid
;
3907 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3908 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3916 printf("vm_page %p: \n", p
);
3917 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3918 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3919 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p
->next_m
));
3920 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3921 printf(" wire_count=%u\n", p
->wire_count
);
3923 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3924 (p
->local
? "" : "!"),
3925 (p
->inactive
? "" : "!"),
3926 (p
->active
? "" : "!"),
3927 (p
->pageout_queue
? "" : "!"),
3928 (p
->speculative
? "" : "!"),
3929 (p
->laundry
? "" : "!"));
3930 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3931 (p
->free
? "" : "!"),
3932 (p
->reference
? "" : "!"),
3933 (p
->gobbled
? "" : "!"),
3934 (p
->private ? "" : "!"),
3935 (p
->throttled
? "" : "!"));
3936 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3937 (p
->busy
? "" : "!"),
3938 (p
->wanted
? "" : "!"),
3939 (p
->tabled
? "" : "!"),
3940 (p
->fictitious
? "" : "!"),
3941 (p
->pmapped
? "" : "!"),
3942 (p
->wpmapped
? "" : "!"));
3943 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3944 (p
->pageout
? "" : "!"),
3945 (p
->absent
? "" : "!"),
3946 (p
->error
? "" : "!"),
3947 (p
->dirty
? "" : "!"),
3948 (p
->cleaning
? "" : "!"),
3949 (p
->precious
? "" : "!"),
3950 (p
->clustered
? "" : "!"));
3951 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3952 (p
->overwriting
? "" : "!"),
3953 (p
->restart
? "" : "!"),
3954 (p
->unusual
? "" : "!"),
3955 (p
->encrypted
? "" : "!"),
3956 (p
->encrypted_cleaning
? "" : "!"));
3957 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
3958 (p
->cs_validated
? "" : "!"),
3959 (p
->cs_tainted
? "" : "!"),
3960 (p
->cs_nx
? "" : "!"),
3961 (p
->no_cache
? "" : "!"));
3963 printf("phys_page=0x%x\n", p
->phys_page
);
3967 * Check that the list of pages is ordered by
3968 * ascending physical address and has no holes.
3971 vm_page_verify_contiguous(
3973 unsigned int npages
)
3975 register vm_page_t m
;
3976 unsigned int page_count
;
3977 vm_offset_t prev_addr
;
3979 prev_addr
= pages
->phys_page
;
3981 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3982 if (m
->phys_page
!= prev_addr
+ 1) {
3983 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3984 m
, (long)prev_addr
, m
->phys_page
);
3985 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3986 panic("vm_page_verify_contiguous: not contiguous!");
3988 prev_addr
= m
->phys_page
;
3991 if (page_count
!= npages
) {
3992 printf("pages %p actual count 0x%x but requested 0x%x\n",
3993 pages
, page_count
, npages
);
3994 panic("vm_page_verify_contiguous: count error");
4001 * Check the free lists for proper length etc.
4003 static boolean_t vm_page_verify_this_free_list_enabled
= FALSE
;
4005 vm_page_verify_free_list(
4006 queue_head_t
*vm_page_queue
,
4008 vm_page_t look_for_page
,
4009 boolean_t expect_page
)
4011 unsigned int npages
;
4014 boolean_t found_page
;
4016 if (! vm_page_verify_this_free_list_enabled
)
4021 prev_m
= (vm_page_t
) vm_page_queue
;
4022 queue_iterate(vm_page_queue
,
4027 if (m
== look_for_page
) {
4030 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
4031 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4032 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
4034 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4036 if (color
!= (unsigned int) -1) {
4037 if ((m
->phys_page
& vm_color_mask
) != color
)
4038 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4039 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
4041 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
4047 if (look_for_page
!= VM_PAGE_NULL
) {
4048 unsigned int other_color
;
4050 if (expect_page
&& !found_page
) {
4051 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4052 color
, npages
, look_for_page
, look_for_page
->phys_page
);
4053 _vm_page_print(look_for_page
);
4054 for (other_color
= 0;
4055 other_color
< vm_colors
;
4057 if (other_color
== color
)
4059 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
4060 other_color
, look_for_page
, FALSE
);
4062 if (color
== (unsigned int) -1) {
4063 vm_page_verify_free_list(&vm_lopage_queue_free
,
4064 (unsigned int) -1, look_for_page
, FALSE
);
4066 panic("vm_page_verify_free_list(color=%u)\n", color
);
4068 if (!expect_page
&& found_page
) {
4069 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4070 color
, npages
, look_for_page
, look_for_page
->phys_page
);
4076 static boolean_t vm_page_verify_all_free_lists_enabled
= FALSE
;
4078 vm_page_verify_free_lists( void )
4080 unsigned int color
, npages
, nlopages
;
4081 boolean_t toggle
= TRUE
;
4083 if (! vm_page_verify_all_free_lists_enabled
)
4088 lck_mtx_lock(&vm_page_queue_free_lock
);
4090 if (vm_page_verify_this_free_list_enabled
== TRUE
) {
4092 * This variable has been set globally for extra checking of
4093 * each free list Q. Since we didn't set it, we don't own it
4094 * and we shouldn't toggle it.
4099 if (toggle
== TRUE
) {
4100 vm_page_verify_this_free_list_enabled
= TRUE
;
4103 for( color
= 0; color
< vm_colors
; color
++ ) {
4104 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
4105 color
, VM_PAGE_NULL
, FALSE
);
4107 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
4109 VM_PAGE_NULL
, FALSE
);
4110 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
4111 panic("vm_page_verify_free_lists: "
4112 "npages %u free_count %d nlopages %u lo_free_count %u",
4113 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
4115 if (toggle
== TRUE
) {
4116 vm_page_verify_this_free_list_enabled
= FALSE
;
4119 lck_mtx_unlock(&vm_page_queue_free_lock
);
4123 vm_page_queues_assert(
4128 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4130 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
4131 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
4132 _vm_page_print(mem
);
4133 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
4135 if (VM_PAGE_WIRED(mem
)) {
4136 assert(!mem
->active
);
4137 assert(!mem
->inactive
);
4138 assert(!mem
->speculative
);
4139 assert(!mem
->throttled
);
4140 assert(!mem
->pageout_queue
);
4143 #endif /* MACH_ASSERT */
4149 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4152 * CONTIGUOUS PAGE ALLOCATION
4154 * Find a region large enough to contain at least n pages
4155 * of contiguous physical memory.
4157 * This is done by traversing the vm_page_t array in a linear fashion
4158 * we assume that the vm_page_t array has the avaiable physical pages in an
4159 * ordered, ascending list... this is currently true of all our implementations
4160 * and must remain so... there can be 'holes' in the array... we also can
4161 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4162 * which use to happen via 'vm_page_convert'... that function was no longer
4163 * being called and was removed...
4165 * The basic flow consists of stabilizing some of the interesting state of
4166 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4167 * sweep at the beginning of the array looking for pages that meet our criterea
4168 * for a 'stealable' page... currently we are pretty conservative... if the page
4169 * meets this criterea and is physically contiguous to the previous page in the 'run'
4170 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4171 * and start to develop a new run... if at this point we've already considered
4172 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4173 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4174 * to other threads trying to acquire free pages (or move pages from q to q),
4175 * and then continue from the spot we left off... we only make 1 pass through the
4176 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4177 * which steals the pages from the queues they're currently on... pages on the free
4178 * queue can be stolen directly... pages that are on any of the other queues
4179 * must be removed from the object they are tabled on... this requires taking the
4180 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4181 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4182 * dump the pages we've currently stolen back to the free list, and pick up our
4183 * scan from the point where we aborted the 'current' run.
4187 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4189 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4194 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4197 #define RESET_STATE_OF_RUN() \
4199 prevcontaddr = -2; \
4201 free_considered = 0; \
4202 substitute_needed = 0; \
4207 * Can we steal in-use (i.e. not free) pages when searching for
4208 * physically-contiguous pages ?
4210 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4212 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
4214 int vm_page_find_contig_debug
= 0;
4218 vm_page_find_contiguous(
4219 unsigned int contig_pages
,
4226 ppnum_t prevcontaddr
;
4228 unsigned int npages
, considered
, scanned
;
4229 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
4230 unsigned int idx_last_contig_page_found
= 0;
4231 int free_considered
, free_available
;
4232 int substitute_needed
;
4233 boolean_t wrapped
, zone_gc_called
= FALSE
;
4235 clock_sec_t tv_start_sec
, tv_end_sec
;
4236 clock_usec_t tv_start_usec
, tv_end_usec
;
4241 int stolen_pages
= 0;
4242 int compressed_pages
= 0;
4245 if (contig_pages
== 0)
4246 return VM_PAGE_NULL
;
4251 vm_page_verify_free_lists();
4254 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
4256 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4258 vm_page_lock_queues();
4261 lck_mtx_lock(&vm_page_queue_free_lock
);
4263 RESET_STATE_OF_RUN();
4267 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4271 if(flags
& KMA_LOMEM
)
4272 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
4274 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
4276 orig_last_idx
= idx_last_contig_page_found
;
4277 last_idx
= orig_last_idx
;
4279 for (page_idx
= last_idx
, start_idx
= last_idx
;
4280 npages
< contig_pages
&& page_idx
< vm_pages_count
;
4285 page_idx
>= orig_last_idx
) {
4287 * We're back where we started and we haven't
4288 * found any suitable contiguous range. Let's
4294 m
= &vm_pages
[page_idx
];
4296 assert(!m
->fictitious
);
4297 assert(!m
->private);
4299 if (max_pnum
&& m
->phys_page
> max_pnum
) {
4300 /* no more low pages... */
4303 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
4307 RESET_STATE_OF_RUN();
4309 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
4310 m
->encrypted_cleaning
||
4311 m
->pageout_queue
|| m
->laundry
|| m
->wanted
||
4312 m
->cleaning
|| m
->overwriting
|| m
->pageout
) {
4314 * page is in a transient state
4315 * or a state we don't want to deal
4316 * with, so don't consider it which
4317 * means starting a new run
4319 RESET_STATE_OF_RUN();
4321 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
&& !m
->compressor
) {
4323 * page needs to be on one of our queues
4324 * or it needs to belong to the compressor pool
4325 * in order for it to be stable behind the
4326 * locks we hold at this point...
4327 * if not, don't consider it which
4328 * means starting a new run
4330 RESET_STATE_OF_RUN();
4332 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
4334 * pages on the free list are always 'busy'
4335 * so we couldn't test for 'busy' in the check
4336 * for the transient states... pages that are
4337 * 'free' are never 'tabled', so we also couldn't
4338 * test for 'tabled'. So we check here to make
4339 * sure that a non-free page is not busy and is
4340 * tabled on an object...
4341 * if not, don't consider it which
4342 * means starting a new run
4344 RESET_STATE_OF_RUN();
4347 if (m
->phys_page
!= prevcontaddr
+ 1) {
4348 if ((m
->phys_page
& pnum_mask
) != 0) {
4349 RESET_STATE_OF_RUN();
4353 start_idx
= page_idx
;
4354 start_pnum
= m
->phys_page
;
4359 prevcontaddr
= m
->phys_page
;
4366 * This page is not free.
4367 * If we can't steal used pages,
4368 * we have to give up this run
4370 * Otherwise, we might need to
4371 * move the contents of this page
4372 * into a substitute page.
4374 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4375 if (m
->pmapped
|| m
->dirty
|| m
->precious
) {
4376 substitute_needed
++;
4379 RESET_STATE_OF_RUN();
4383 if ((free_considered
+ substitute_needed
) > free_available
) {
4385 * if we let this run continue
4386 * we will end up dropping the vm_page_free_count
4387 * below the reserve limit... we need to abort
4388 * this run, but we can at least re-consider this
4389 * page... thus the jump back to 'retry'
4391 RESET_STATE_OF_RUN();
4393 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
4398 * free_available == 0
4399 * so can't consider any free pages... if
4400 * we went to retry in this case, we'd
4401 * get stuck looking at the same page
4402 * w/o making any forward progress
4403 * we also want to take this path if we've already
4404 * reached our limit that controls the lock latency
4409 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
4411 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4413 lck_mtx_unlock(&vm_page_queue_free_lock
);
4414 vm_page_unlock_queues();
4418 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4420 vm_page_lock_queues();
4421 lck_mtx_lock(&vm_page_queue_free_lock
);
4423 RESET_STATE_OF_RUN();
4425 * reset our free page limit since we
4426 * dropped the lock protecting the vm_page_free_queue
4428 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4439 if (npages
!= contig_pages
) {
4442 * We didn't find a contiguous range but we didn't
4443 * start from the very first page.
4444 * Start again from the very first page.
4446 RESET_STATE_OF_RUN();
4447 if( flags
& KMA_LOMEM
)
4448 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
4450 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
4452 page_idx
= last_idx
;
4456 lck_mtx_unlock(&vm_page_queue_free_lock
);
4460 unsigned int cur_idx
;
4461 unsigned int tmp_start_idx
;
4462 vm_object_t locked_object
= VM_OBJECT_NULL
;
4463 boolean_t abort_run
= FALSE
;
4465 assert(page_idx
- start_idx
== contig_pages
);
4467 tmp_start_idx
= start_idx
;
4470 * first pass through to pull the free pages
4471 * off of the free queue so that in case we
4472 * need substitute pages, we won't grab any
4473 * of the free pages in the run... we'll clear
4474 * the 'free' bit in the 2nd pass, and even in
4475 * an abort_run case, we'll collect all of the
4476 * free pages in this run and return them to the free list
4478 while (start_idx
< page_idx
) {
4480 m1
= &vm_pages
[start_idx
++];
4482 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4489 color
= m1
->phys_page
& vm_color_mask
;
4491 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
4493 queue_remove(&vm_page_queue_free
[color
],
4497 m1
->pageq
.next
= NULL
;
4498 m1
->pageq
.prev
= NULL
;
4500 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
4503 * Clear the "free" bit so that this page
4504 * does not get considered for another
4505 * concurrent physically-contiguous allocation.
4510 vm_page_free_count
--;
4513 if( flags
& KMA_LOMEM
)
4514 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4516 vm_page_find_contiguous_last_idx
= page_idx
;
4519 * we can drop the free queue lock at this point since
4520 * we've pulled any 'free' candidates off of the list
4521 * we need it dropped so that we can do a vm_page_grab
4522 * when substituing for pmapped/dirty pages
4524 lck_mtx_unlock(&vm_page_queue_free_lock
);
4526 start_idx
= tmp_start_idx
;
4527 cur_idx
= page_idx
- 1;
4529 while (start_idx
++ < page_idx
) {
4531 * must go through the list from back to front
4532 * so that the page list is created in the
4533 * correct order - low -> high phys addresses
4535 m1
= &vm_pages
[cur_idx
--];
4539 if (m1
->object
== VM_OBJECT_NULL
) {
4541 * page has already been removed from
4542 * the free list in the 1st pass
4544 assert(m1
->offset
== (vm_object_offset_t
) -1);
4546 assert(!m1
->wanted
);
4547 assert(!m1
->laundry
);
4551 boolean_t disconnected
, reusable
;
4553 if (abort_run
== TRUE
)
4556 object
= m1
->object
;
4558 if (object
!= locked_object
) {
4559 if (locked_object
) {
4560 vm_object_unlock(locked_object
);
4561 locked_object
= VM_OBJECT_NULL
;
4563 if (vm_object_lock_try(object
))
4564 locked_object
= object
;
4566 if (locked_object
== VM_OBJECT_NULL
||
4567 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
4568 m1
->encrypted_cleaning
||
4569 m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
||
4570 m1
->cleaning
|| m1
->overwriting
|| m1
->pageout
|| m1
->busy
)) {
4572 if (locked_object
) {
4573 vm_object_unlock(locked_object
);
4574 locked_object
= VM_OBJECT_NULL
;
4576 tmp_start_idx
= cur_idx
;
4581 disconnected
= FALSE
;
4584 if ((m1
->reusable
||
4585 m1
->object
->all_reusable
) &&
4589 /* reusable page... */
4590 refmod
= pmap_disconnect(m1
->phys_page
);
4591 disconnected
= TRUE
;
4594 * ... not reused: can steal
4595 * without relocating contents.
4605 vm_object_offset_t offset
;
4607 m2
= vm_page_grab();
4609 if (m2
== VM_PAGE_NULL
) {
4610 if (locked_object
) {
4611 vm_object_unlock(locked_object
);
4612 locked_object
= VM_OBJECT_NULL
;
4614 tmp_start_idx
= cur_idx
;
4618 if (! disconnected
) {
4620 refmod
= pmap_disconnect(m1
->phys_page
);
4625 /* copy the page's contents */
4626 pmap_copy_page(m1
->phys_page
, m2
->phys_page
);
4627 /* copy the page's state */
4628 assert(!VM_PAGE_WIRED(m1
));
4630 assert(!m1
->pageout_queue
);
4631 assert(!m1
->laundry
);
4632 m2
->reference
= m1
->reference
;
4633 assert(!m1
->gobbled
);
4634 assert(!m1
->private);
4635 m2
->no_cache
= m1
->no_cache
;
4638 assert(!m1
->wanted
);
4639 assert(!m1
->fictitious
);
4640 m2
->pmapped
= m1
->pmapped
; /* should flush cache ? */
4641 m2
->wpmapped
= m1
->wpmapped
;
4642 assert(!m1
->pageout
);
4643 m2
->absent
= m1
->absent
;
4644 m2
->error
= m1
->error
;
4645 m2
->dirty
= m1
->dirty
;
4646 assert(!m1
->cleaning
);
4647 m2
->precious
= m1
->precious
;
4648 m2
->clustered
= m1
->clustered
;
4649 assert(!m1
->overwriting
);
4650 m2
->restart
= m1
->restart
;
4651 m2
->unusual
= m1
->unusual
;
4652 m2
->encrypted
= m1
->encrypted
;
4653 assert(!m1
->encrypted_cleaning
);
4654 m2
->cs_validated
= m1
->cs_validated
;
4655 m2
->cs_tainted
= m1
->cs_tainted
;
4656 m2
->cs_nx
= m1
->cs_nx
;
4659 * If m1 had really been reusable,
4660 * we would have just stolen it, so
4661 * let's not propagate it's "reusable"
4662 * bit and assert that m2 is not
4663 * marked as "reusable".
4665 // m2->reusable = m1->reusable;
4666 assert(!m2
->reusable
);
4668 assert(!m1
->lopage
);
4669 m2
->slid
= m1
->slid
;
4670 m2
->compressor
= m1
->compressor
;
4673 * page may need to be flushed if
4674 * it is marshalled into a UPL
4675 * that is going to be used by a device
4676 * that doesn't support coherency
4678 m2
->written_by_kernel
= TRUE
;
4681 * make sure we clear the ref/mod state
4682 * from the pmap layer... else we risk
4683 * inheriting state from the last time
4684 * this page was used...
4686 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4688 if (refmod
& VM_MEM_REFERENCED
)
4689 m2
->reference
= TRUE
;
4690 if (refmod
& VM_MEM_MODIFIED
) {
4691 SET_PAGE_DIRTY(m2
, TRUE
);
4693 offset
= m1
->offset
;
4696 * completely cleans up the state
4697 * of the page so that it is ready
4698 * to be put onto the free list, or
4699 * for this purpose it looks like it
4700 * just came off of the free list
4702 vm_page_free_prepare(m1
);
4705 * now put the substitute page
4708 vm_page_insert_internal(m2
, locked_object
, offset
, VM_KERN_MEMORY_NONE
, TRUE
, TRUE
, FALSE
, FALSE
, NULL
);
4710 if (m2
->compressor
) {
4712 m2
->wpmapped
= TRUE
;
4714 PMAP_ENTER(kernel_pmap
, m2
->offset
, m2
,
4715 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
);
4721 vm_page_activate(m2
);
4723 vm_page_deactivate(m2
);
4725 PAGE_WAKEUP_DONE(m2
);
4728 assert(!m1
->compressor
);
4731 * completely cleans up the state
4732 * of the page so that it is ready
4733 * to be put onto the free list, or
4734 * for this purpose it looks like it
4735 * just came off of the free list
4737 vm_page_free_prepare(m1
);
4743 m1
->pageq
.next
= (queue_entry_t
) m
;
4744 m1
->pageq
.prev
= NULL
;
4747 if (locked_object
) {
4748 vm_object_unlock(locked_object
);
4749 locked_object
= VM_OBJECT_NULL
;
4752 if (abort_run
== TRUE
) {
4753 if (m
!= VM_PAGE_NULL
) {
4754 vm_page_free_list(m
, FALSE
);
4760 * want the index of the last
4761 * page in this run that was
4762 * successfully 'stolen', so back
4763 * it up 1 for the auto-decrement on use
4764 * and 1 more to bump back over this page
4766 page_idx
= tmp_start_idx
+ 2;
4767 if (page_idx
>= vm_pages_count
) {
4770 page_idx
= last_idx
= 0;
4776 * We didn't find a contiguous range but we didn't
4777 * start from the very first page.
4778 * Start again from the very first page.
4780 RESET_STATE_OF_RUN();
4782 if( flags
& KMA_LOMEM
)
4783 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4785 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4787 last_idx
= page_idx
;
4789 lck_mtx_lock(&vm_page_queue_free_lock
);
4791 * reset our free page limit since we
4792 * dropped the lock protecting the vm_page_free_queue
4794 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4798 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4806 vm_page_gobble_count
+= npages
;
4809 * gobbled pages are also counted as wired pages
4811 vm_page_wire_count
+= npages
;
4813 assert(vm_page_verify_contiguous(m
, npages
));
4816 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4818 vm_page_unlock_queues();
4821 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4823 tv_end_sec
-= tv_start_sec
;
4824 if (tv_end_usec
< tv_start_usec
) {
4826 tv_end_usec
+= 1000000;
4828 tv_end_usec
-= tv_start_usec
;
4829 if (tv_end_usec
>= 1000000) {
4831 tv_end_sec
-= 1000000;
4833 if (vm_page_find_contig_debug
) {
4834 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4835 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4836 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4837 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
4842 vm_page_verify_free_lists();
4844 if (m
== NULL
&& zone_gc_called
== FALSE
) {
4845 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
4846 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4847 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
, vm_page_wire_count
);
4849 if (consider_buffer_cache_collect
!= NULL
) {
4850 (void)(*consider_buffer_cache_collect
)(1);
4853 consider_zone_gc(TRUE
);
4855 zone_gc_called
= TRUE
;
4857 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count
);
4858 goto full_scan_again
;
4865 * Allocate a list of contiguous, wired pages.
4877 unsigned int npages
;
4879 if (size
% PAGE_SIZE
!= 0)
4880 return KERN_INVALID_ARGUMENT
;
4882 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4883 if (npages
!= size
/ PAGE_SIZE
) {
4884 /* 32-bit overflow */
4885 return KERN_INVALID_ARGUMENT
;
4889 * Obtain a pointer to a subset of the free
4890 * list large enough to satisfy the request;
4891 * the region will be physically contiguous.
4893 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4895 if (pages
== VM_PAGE_NULL
)
4896 return KERN_NO_SPACE
;
4898 * determine need for wakeups
4900 if ((vm_page_free_count
< vm_page_free_min
) ||
4901 ((vm_page_free_count
< vm_page_free_target
) &&
4902 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4903 thread_wakeup((event_t
) &vm_page_free_wanted
);
4905 VM_CHECK_MEMORYSTATUS
;
4908 * The CPM pages should now be available and
4909 * ordered by ascending physical address.
4911 assert(vm_page_verify_contiguous(pages
, npages
));
4914 return KERN_SUCCESS
;
4918 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4921 * when working on a 'run' of pages, it is necessary to hold
4922 * the vm_page_queue_lock (a hot global lock) for certain operations
4923 * on the page... however, the majority of the work can be done
4924 * while merely holding the object lock... in fact there are certain
4925 * collections of pages that don't require any work brokered by the
4926 * vm_page_queue_lock... to mitigate the time spent behind the global
4927 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4928 * while doing all of the work that doesn't require the vm_page_queue_lock...
4929 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4930 * necessary work for each page... we will grab the busy bit on the page
4931 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4932 * if it can't immediately take the vm_page_queue_lock in order to compete
4933 * for the locks in the same order that vm_pageout_scan takes them.
4934 * the operation names are modeled after the names of the routines that
4935 * need to be called in order to make the changes very obvious in the
4940 vm_page_do_delayed_work(
4943 struct vm_page_delayed_work
*dwp
,
4948 vm_page_t local_free_q
= VM_PAGE_NULL
;
4951 * pageout_scan takes the vm_page_lock_queues first
4952 * then tries for the object lock... to avoid what
4953 * is effectively a lock inversion, we'll go to the
4954 * trouble of taking them in that same order... otherwise
4955 * if this object contains the majority of the pages resident
4956 * in the UBC (or a small set of large objects actively being
4957 * worked on contain the majority of the pages), we could
4958 * cause the pageout_scan thread to 'starve' in its attempt
4959 * to find pages to move to the free queue, since it has to
4960 * successfully acquire the object lock of any candidate page
4961 * before it can steal/clean it.
4963 if (!vm_page_trylockspin_queues()) {
4964 vm_object_unlock(object
);
4966 vm_page_lockspin_queues();
4968 for (j
= 0; ; j
++) {
4969 if (!vm_object_lock_avoid(object
) &&
4970 _vm_object_lock_try(object
))
4972 vm_page_unlock_queues();
4974 vm_page_lockspin_queues();
4977 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4981 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4982 vm_pageout_throttle_up(m
);
4983 #if CONFIG_PHANTOM_CACHE
4984 if (dwp
->dw_mask
& DW_vm_phantom_cache_update
)
4985 vm_phantom_cache_update(m
);
4987 if (dwp
->dw_mask
& DW_vm_page_wire
)
4988 vm_page_wire(m
, tag
, FALSE
);
4989 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4992 queueit
= (dwp
->dw_mask
& (DW_vm_page_free
| DW_vm_page_deactivate_internal
)) ? FALSE
: TRUE
;
4994 vm_page_unwire(m
, queueit
);
4996 if (dwp
->dw_mask
& DW_vm_page_free
) {
4997 vm_page_free_prepare_queues(m
);
4999 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
5001 * Add this page to our list of reclaimed pages,
5002 * to be freed later.
5004 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
5007 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
5008 vm_page_deactivate_internal(m
, FALSE
);
5009 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
5010 if (m
->active
== FALSE
) {
5011 vm_page_activate(m
);
5014 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
5015 vm_page_speculate(m
, TRUE
);
5016 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
5018 * if we didn't hold the object lock and did this,
5019 * we might disconnect the page, then someone might
5020 * soft fault it back in, then we would put it on the
5021 * cleaned queue, and so we would have a referenced (maybe even dirty)
5022 * page on that queue, which we don't want
5024 int refmod_state
= pmap_disconnect(m
->phys_page
);
5026 if ((refmod_state
& VM_MEM_REFERENCED
)) {
5028 * this page has been touched since it got cleaned; let's activate it
5029 * if it hasn't already been
5031 vm_pageout_enqueued_cleaned
++;
5032 vm_pageout_cleaned_reactivated
++;
5033 vm_pageout_cleaned_commit_reactivated
++;
5035 if (m
->active
== FALSE
)
5036 vm_page_activate(m
);
5038 m
->reference
= FALSE
;
5039 vm_page_enqueue_cleaned(m
);
5042 else if (dwp
->dw_mask
& DW_vm_page_lru
)
5044 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
5045 if ( !m
->pageout_queue
)
5046 vm_page_queues_remove(m
);
5048 if (dwp
->dw_mask
& DW_set_reference
)
5049 m
->reference
= TRUE
;
5050 else if (dwp
->dw_mask
& DW_clear_reference
)
5051 m
->reference
= FALSE
;
5053 if (dwp
->dw_mask
& DW_move_page
) {
5054 if ( !m
->pageout_queue
) {
5055 vm_page_queues_remove(m
);
5057 assert(m
->object
!= kernel_object
);
5059 vm_page_enqueue_inactive(m
, FALSE
);
5062 if (dwp
->dw_mask
& DW_clear_busy
)
5065 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
5069 vm_page_unlock_queues();
5072 vm_page_free_list(local_free_q
, TRUE
);
5074 VM_CHECK_MEMORYSTATUS
;
5084 vm_page_t lo_page_list
= VM_PAGE_NULL
;
5088 if ( !(flags
& KMA_LOMEM
))
5089 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
5091 for (i
= 0; i
< page_count
; i
++) {
5093 mem
= vm_page_grablo();
5095 if (mem
== VM_PAGE_NULL
) {
5097 vm_page_free_list(lo_page_list
, FALSE
);
5099 *list
= VM_PAGE_NULL
;
5101 return (KERN_RESOURCE_SHORTAGE
);
5103 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
5106 *list
= lo_page_list
;
5108 return (KERN_SUCCESS
);
5112 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
5114 page
->offset
= offset
;
5118 vm_page_get_next(vm_page_t page
)
5120 return ((vm_page_t
) page
->pageq
.next
);
5124 vm_page_get_offset(vm_page_t page
)
5126 return (page
->offset
);
5130 vm_page_get_phys_page(vm_page_t page
)
5132 return (page
->phys_page
);
5136 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5140 static vm_page_t hibernate_gobble_queue
;
5142 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
5143 static int hibernate_flush_dirty_pages(int);
5144 static int hibernate_flush_queue(queue_head_t
*, int);
5146 void hibernate_flush_wait(void);
5147 void hibernate_mark_in_progress(void);
5148 void hibernate_clear_in_progress(void);
5150 void hibernate_free_range(int, int);
5151 void hibernate_hash_insert_page(vm_page_t
);
5152 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
5153 void hibernate_rebuild_vm_structs(void);
5154 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
5155 ppnum_t
hibernate_lookup_paddr(unsigned int);
5157 struct hibernate_statistics
{
5158 int hibernate_considered
;
5159 int hibernate_reentered_on_q
;
5160 int hibernate_found_dirty
;
5161 int hibernate_skipped_cleaning
;
5162 int hibernate_skipped_transient
;
5163 int hibernate_skipped_precious
;
5164 int hibernate_skipped_external
;
5165 int hibernate_queue_nolock
;
5166 int hibernate_queue_paused
;
5167 int hibernate_throttled
;
5168 int hibernate_throttle_timeout
;
5169 int hibernate_drained
;
5170 int hibernate_drain_timeout
;
5172 int cd_found_precious
;
5175 int cd_found_unusual
;
5176 int cd_found_cleaning
;
5177 int cd_found_laundry
;
5179 int cd_found_xpmapped
;
5180 int cd_skipped_xpmapped
;
5183 int cd_vm_page_wire_count
;
5184 int cd_vm_struct_pages_unneeded
;
5192 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5193 * so that we don't overrun the estimated image size, which would
5194 * result in a hibernation failure.
5196 #define HIBERNATE_XPMAPPED_LIMIT 40000
5200 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
5202 wait_result_t wait_result
;
5204 vm_page_lock_queues();
5206 while ( !queue_empty(&q
->pgo_pending
) ) {
5208 q
->pgo_draining
= TRUE
;
5210 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
5212 vm_page_unlock_queues();
5214 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5216 if (wait_result
== THREAD_TIMED_OUT
&& !queue_empty(&q
->pgo_pending
)) {
5217 hibernate_stats
.hibernate_drain_timeout
++;
5219 if (q
== &vm_pageout_queue_external
)
5224 vm_page_lock_queues();
5226 hibernate_stats
.hibernate_drained
++;
5228 vm_page_unlock_queues();
5234 boolean_t hibernate_skip_external
= FALSE
;
5237 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
5240 vm_object_t l_object
= NULL
;
5241 vm_object_t m_object
= NULL
;
5242 int refmod_state
= 0;
5243 int try_failed_count
= 0;
5245 int current_run
= 0;
5246 struct vm_pageout_queue
*iq
;
5247 struct vm_pageout_queue
*eq
;
5248 struct vm_pageout_queue
*tq
;
5251 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
5253 iq
= &vm_pageout_queue_internal
;
5254 eq
= &vm_pageout_queue_external
;
5256 vm_page_lock_queues();
5258 while (qcount
&& !queue_empty(q
)) {
5260 if (current_run
++ == 1000) {
5261 if (hibernate_should_abort()) {
5268 m
= (vm_page_t
) queue_first(q
);
5269 m_object
= m
->object
;
5272 * check to see if we currently are working
5273 * with the same object... if so, we've
5274 * already got the lock
5276 if (m_object
!= l_object
) {
5278 * the object associated with candidate page is
5279 * different from the one we were just working
5280 * with... dump the lock if we still own it
5282 if (l_object
!= NULL
) {
5283 vm_object_unlock(l_object
);
5287 * Try to lock object; since we've alread got the
5288 * page queues lock, we can only 'try' for this one.
5289 * if the 'try' fails, we need to do a mutex_pause
5290 * to allow the owner of the object lock a chance to
5293 if ( !vm_object_lock_try_scan(m_object
)) {
5295 if (try_failed_count
> 20) {
5296 hibernate_stats
.hibernate_queue_nolock
++;
5298 goto reenter_pg_on_q
;
5301 vm_page_unlock_queues();
5302 mutex_pause(try_failed_count
++);
5303 vm_page_lock_queues();
5305 hibernate_stats
.hibernate_queue_paused
++;
5308 l_object
= m_object
;
5311 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
5313 * page is not to be cleaned
5314 * put it back on the head of its queue
5317 hibernate_stats
.hibernate_skipped_cleaning
++;
5319 hibernate_stats
.hibernate_skipped_transient
++;
5321 goto reenter_pg_on_q
;
5323 if (m_object
->copy
== VM_OBJECT_NULL
) {
5324 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
5326 * let the normal hibernate image path
5329 goto reenter_pg_on_q
;
5332 if ( !m
->dirty
&& m
->pmapped
) {
5333 refmod_state
= pmap_get_refmod(m
->phys_page
);
5335 if ((refmod_state
& VM_MEM_MODIFIED
)) {
5336 SET_PAGE_DIRTY(m
, FALSE
);
5343 * page is not to be cleaned
5344 * put it back on the head of its queue
5347 hibernate_stats
.hibernate_skipped_precious
++;
5349 goto reenter_pg_on_q
;
5352 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
5354 hibernate_stats
.hibernate_skipped_external
++;
5356 goto reenter_pg_on_q
;
5360 if (m_object
->internal
) {
5361 if (VM_PAGE_Q_THROTTLED(iq
))
5363 } else if (VM_PAGE_Q_THROTTLED(eq
))
5367 wait_result_t wait_result
;
5370 if (l_object
!= NULL
) {
5371 vm_object_unlock(l_object
);
5375 while (retval
== 0) {
5377 tq
->pgo_throttled
= TRUE
;
5379 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
5381 vm_page_unlock_queues();
5383 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
5385 vm_page_lock_queues();
5387 if (wait_result
!= THREAD_TIMED_OUT
)
5389 if (!VM_PAGE_Q_THROTTLED(tq
))
5392 if (hibernate_should_abort())
5395 if (--wait_count
== 0) {
5397 hibernate_stats
.hibernate_throttle_timeout
++;
5400 hibernate_skip_external
= TRUE
;
5409 hibernate_stats
.hibernate_throttled
++;
5414 * we've already factored out pages in the laundry which
5415 * means this page can't be on the pageout queue so it's
5416 * safe to do the vm_page_queues_remove
5418 assert(!m
->pageout_queue
);
5420 vm_page_queues_remove(m
);
5422 if (COMPRESSED_PAGER_IS_ACTIVE
&& m_object
->internal
== TRUE
)
5423 pmap_disconnect_options(m
->phys_page
, PMAP_OPTIONS_COMPRESSOR
, NULL
);
5425 (void)vm_pageout_cluster(m
, FALSE
, FALSE
, FALSE
);
5427 hibernate_stats
.hibernate_found_dirty
++;
5432 queue_remove(q
, m
, vm_page_t
, pageq
);
5433 queue_enter(q
, m
, vm_page_t
, pageq
);
5435 hibernate_stats
.hibernate_reentered_on_q
++;
5437 hibernate_stats
.hibernate_considered
++;
5440 try_failed_count
= 0;
5442 if (l_object
!= NULL
) {
5443 vm_object_unlock(l_object
);
5447 vm_page_unlock_queues();
5449 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
5456 hibernate_flush_dirty_pages(int pass
)
5458 struct vm_speculative_age_q
*aq
;
5461 if (vm_page_local_q
) {
5462 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5463 vm_page_reactivate_local(i
, TRUE
, FALSE
);
5466 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
5470 aq
= &vm_page_queue_speculative
[i
];
5472 if (queue_empty(&aq
->age_q
))
5476 vm_page_lockspin_queues();
5478 queue_iterate(&aq
->age_q
,
5485 vm_page_unlock_queues();
5488 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
5492 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
5494 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
5496 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
5498 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
5501 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5502 vm_compressor_record_warmup_start();
5504 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
5505 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5506 vm_compressor_record_warmup_end();
5509 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
5510 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5511 vm_compressor_record_warmup_end();
5514 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5515 vm_compressor_record_warmup_end();
5517 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
))
5525 hibernate_reset_stats()
5527 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
5532 hibernate_flush_memory()
5536 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
5538 hibernate_cleaning_in_progress
= TRUE
;
5539 hibernate_skip_external
= FALSE
;
5541 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
5543 if (COMPRESSED_PAGER_IS_ACTIVE
) {
5545 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5547 vm_compressor_flush();
5549 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5551 if (consider_buffer_cache_collect
!= NULL
) {
5552 unsigned int orig_wire_count
;
5554 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5555 orig_wire_count
= vm_page_wire_count
;
5557 (void)(*consider_buffer_cache_collect
)(1);
5558 consider_zone_gc(TRUE
);
5560 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
5562 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
5565 hibernate_cleaning_in_progress
= FALSE
;
5567 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
5569 if (retval
&& COMPRESSED_PAGER_IS_ACTIVE
)
5570 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
5573 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5574 hibernate_stats
.hibernate_considered
,
5575 hibernate_stats
.hibernate_reentered_on_q
,
5576 hibernate_stats
.hibernate_found_dirty
);
5577 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5578 hibernate_stats
.hibernate_skipped_cleaning
,
5579 hibernate_stats
.hibernate_skipped_transient
,
5580 hibernate_stats
.hibernate_skipped_precious
,
5581 hibernate_stats
.hibernate_skipped_external
,
5582 hibernate_stats
.hibernate_queue_nolock
);
5583 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5584 hibernate_stats
.hibernate_queue_paused
,
5585 hibernate_stats
.hibernate_throttled
,
5586 hibernate_stats
.hibernate_throttle_timeout
,
5587 hibernate_stats
.hibernate_drained
,
5588 hibernate_stats
.hibernate_drain_timeout
);
5595 hibernate_page_list_zero(hibernate_page_list_t
*list
)
5598 hibernate_bitmap_t
* bitmap
;
5600 bitmap
= &list
->bank_bitmap
[0];
5601 for (bank
= 0; bank
< list
->bank_count
; bank
++)
5605 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
5606 // set out-of-bound bits at end of bitmap.
5607 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
5609 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
5611 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5616 hibernate_free_gobble_pages(void)
5621 m
= (vm_page_t
) hibernate_gobble_queue
;
5624 next
= (vm_page_t
) m
->pageq
.next
;
5629 hibernate_gobble_queue
= VM_PAGE_NULL
;
5632 HIBLOG("Freed %d pages\n", count
);
5636 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
5638 vm_object_t object
= NULL
;
5640 boolean_t discard
= FALSE
;
5645 panic("hibernate_consider_discard: private");
5647 if (!vm_object_lock_try(m
->object
)) {
5648 if (!preflight
) hibernate_stats
.cd_lock_failed
++;
5653 if (VM_PAGE_WIRED(m
)) {
5654 if (!preflight
) hibernate_stats
.cd_found_wired
++;
5658 if (!preflight
) hibernate_stats
.cd_found_precious
++;
5661 if (m
->busy
|| !object
->alive
) {
5663 * Somebody is playing with this page.
5665 if (!preflight
) hibernate_stats
.cd_found_busy
++;
5668 if (m
->absent
|| m
->unusual
|| m
->error
) {
5670 * If it's unusual in anyway, ignore it
5672 if (!preflight
) hibernate_stats
.cd_found_unusual
++;
5676 if (!preflight
) hibernate_stats
.cd_found_cleaning
++;
5680 if (!preflight
) hibernate_stats
.cd_found_laundry
++;
5685 refmod_state
= pmap_get_refmod(m
->phys_page
);
5687 if (refmod_state
& VM_MEM_REFERENCED
)
5688 m
->reference
= TRUE
;
5689 if (refmod_state
& VM_MEM_MODIFIED
) {
5690 SET_PAGE_DIRTY(m
, FALSE
);
5695 * If it's clean or purgeable we can discard the page on wakeup.
5697 discard
= (!m
->dirty
)
5698 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
5699 || (VM_PURGABLE_EMPTY
== object
->purgable
);
5702 if (discard
== FALSE
) {
5704 hibernate_stats
.cd_found_dirty
++;
5705 } else if (m
->xpmapped
&& m
->reference
&& !object
->internal
) {
5706 if (hibernate_stats
.cd_found_xpmapped
< HIBERNATE_XPMAPPED_LIMIT
) {
5708 hibernate_stats
.cd_found_xpmapped
++;
5712 hibernate_stats
.cd_skipped_xpmapped
++;
5719 vm_object_unlock(object
);
5726 hibernate_discard_page(vm_page_t m
)
5728 if (m
->absent
|| m
->unusual
|| m
->error
)
5730 * If it's unusual in anyway, ignore
5734 #if MACH_ASSERT || DEBUG
5735 vm_object_t object
= m
->object
;
5736 if (!vm_object_lock_try(m
->object
))
5737 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
5739 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5740 makes sure these locks are uncontended before sleep */
5741 #endif /* MACH_ASSERT || DEBUG */
5743 if (m
->pmapped
== TRUE
)
5745 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
5749 panic("hibernate_discard_page(%p) laundry", m
);
5751 panic("hibernate_discard_page(%p) private", m
);
5753 panic("hibernate_discard_page(%p) fictitious", m
);
5755 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
5757 /* object should be on a queue */
5758 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
5759 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
5761 if (m
->object
->purgeable_when_ripe
) {
5762 vm_purgeable_token_delete_first(old_queue
);
5764 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5767 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5768 * accounted in the "volatile" ledger, so no change here.
5769 * We have to update vm_page_purgeable_count, though, since we're
5770 * effectively purging this object.
5773 assert(m
->object
->resident_page_count
>= m
->object
->wired_page_count
);
5774 delta
= (m
->object
->resident_page_count
- m
->object
->wired_page_count
);
5775 assert(vm_page_purgeable_count
>= delta
);
5777 OSAddAtomic(-delta
, (SInt32
*)&vm_page_purgeable_count
);
5782 #if MACH_ASSERT || DEBUG
5783 vm_object_unlock(object
);
5784 #endif /* MACH_ASSERT || DEBUG */
5788 Grab locks for hibernate_page_list_setall()
5791 hibernate_vm_lock_queues(void)
5793 vm_object_lock(compressor_object
);
5794 vm_page_lock_queues();
5795 lck_mtx_lock(&vm_page_queue_free_lock
);
5797 if (vm_page_local_q
) {
5799 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5801 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5802 VPL_LOCK(&lq
->vpl_lock
);
5808 hibernate_vm_unlock_queues(void)
5810 if (vm_page_local_q
) {
5812 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5814 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5815 VPL_UNLOCK(&lq
->vpl_lock
);
5818 lck_mtx_unlock(&vm_page_queue_free_lock
);
5819 vm_page_unlock_queues();
5820 vm_object_unlock(compressor_object
);
5824 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5825 pages known to VM to not need saving are subtracted.
5826 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5830 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5831 hibernate_page_list_t
* page_list_wired
,
5832 hibernate_page_list_t
* page_list_pal
,
5833 boolean_t preflight
,
5834 boolean_t will_discard
,
5835 uint32_t * pagesOut
)
5837 uint64_t start
, end
, nsec
;
5840 uint32_t pages
= page_list
->page_count
;
5841 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
5842 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
5843 uint32_t count_wire
= pages
;
5844 uint32_t count_discard_active
= 0;
5845 uint32_t count_discard_inactive
= 0;
5846 uint32_t count_discard_cleaned
= 0;
5847 uint32_t count_discard_purgeable
= 0;
5848 uint32_t count_discard_speculative
= 0;
5849 uint32_t count_discard_vm_struct_pages
= 0;
5852 hibernate_bitmap_t
* bitmap
;
5853 hibernate_bitmap_t
* bitmap_wired
;
5854 boolean_t discard_all
;
5857 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight
);
5861 page_list_wired
= NULL
;
5862 page_list_pal
= NULL
;
5863 discard_all
= FALSE
;
5865 discard_all
= will_discard
;
5868 #if MACH_ASSERT || DEBUG
5871 vm_page_lock_queues();
5872 if (vm_page_local_q
) {
5873 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5875 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5876 VPL_LOCK(&lq
->vpl_lock
);
5880 #endif /* MACH_ASSERT || DEBUG */
5883 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5885 clock_get_uptime(&start
);
5888 hibernate_page_list_zero(page_list
);
5889 hibernate_page_list_zero(page_list_wired
);
5890 hibernate_page_list_zero(page_list_pal
);
5892 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5893 hibernate_stats
.cd_pages
= pages
;
5896 if (vm_page_local_q
) {
5897 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5898 vm_page_reactivate_local(i
, TRUE
, !preflight
);
5902 vm_object_lock(compressor_object
);
5903 vm_page_lock_queues();
5904 lck_mtx_lock(&vm_page_queue_free_lock
);
5907 m
= (vm_page_t
) hibernate_gobble_queue
;
5913 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5914 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5916 m
= (vm_page_t
) m
->pageq
.next
;
5919 if (!preflight
) for( i
= 0; i
< real_ncpus
; i
++ )
5921 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5923 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5927 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5928 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5930 hibernate_stats
.cd_local_free
++;
5931 hibernate_stats
.cd_total_free
++;
5936 for( i
= 0; i
< vm_colors
; i
++ )
5938 queue_iterate(&vm_page_queue_free
[i
],
5946 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5947 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5949 hibernate_stats
.cd_total_free
++;
5954 queue_iterate(&vm_lopage_queue_free
,
5962 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5963 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5965 hibernate_stats
.cd_total_free
++;
5969 m
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
5970 while (m
&& !queue_end(&vm_page_queue_throttled
, (queue_entry_t
)m
))
5972 next
= (vm_page_t
) m
->pageq
.next
;
5974 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5975 && hibernate_consider_discard(m
, preflight
))
5977 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5978 count_discard_inactive
++;
5979 discard
= discard_all
;
5984 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5986 if (discard
) hibernate_discard_page(m
);
5990 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5991 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5993 next
= (vm_page_t
) m
->pageq
.next
;
5995 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5996 && hibernate_consider_discard(m
, preflight
))
5998 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
6000 count_discard_purgeable
++;
6002 count_discard_inactive
++;
6003 discard
= discard_all
;
6008 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
6009 if (discard
) hibernate_discard_page(m
);
6013 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
6014 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
6016 next
= (vm_page_t
) m
->pageq
.next
;
6018 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
6019 && hibernate_consider_discard(m
, preflight
))
6021 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
6023 count_discard_purgeable
++;
6025 count_discard_cleaned
++;
6026 discard
= discard_all
;
6031 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
6032 if (discard
) hibernate_discard_page(m
);
6036 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6037 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
6039 next
= (vm_page_t
) m
->pageq
.next
;
6041 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
6042 && hibernate_consider_discard(m
, preflight
))
6044 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
6046 count_discard_purgeable
++;
6048 count_discard_active
++;
6049 discard
= discard_all
;
6054 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
6055 if (discard
) hibernate_discard_page(m
);
6059 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6060 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
6062 next
= (vm_page_t
) m
->pageq
.next
;
6064 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
6065 && hibernate_consider_discard(m
, preflight
))
6067 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
6069 count_discard_purgeable
++;
6071 count_discard_inactive
++;
6072 discard
= discard_all
;
6077 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
6078 if (discard
) hibernate_discard_page(m
);
6082 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
6084 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
6085 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
6087 next
= (vm_page_t
) m
->pageq
.next
;
6089 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
6090 && hibernate_consider_discard(m
, preflight
))
6092 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
6093 count_discard_speculative
++;
6094 discard
= discard_all
;
6097 count_speculative
++;
6099 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
6100 if (discard
) hibernate_discard_page(m
);
6105 queue_iterate(&compressor_object
->memq
, m
, vm_page_t
, listq
)
6109 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
6112 if (preflight
== FALSE
&& discard_all
== TRUE
) {
6113 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6115 HIBLOG("hibernate_teardown started\n");
6116 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
6117 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
6119 pages
-= count_discard_vm_struct_pages
;
6120 count_wire
-= count_discard_vm_struct_pages
;
6122 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
6124 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
6128 // pull wired from hibernate_bitmap
6129 bitmap
= &page_list
->bank_bitmap
[0];
6130 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
6131 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
6133 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
6134 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
6135 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
6136 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
6140 // machine dependent adjustments
6141 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
6144 hibernate_stats
.cd_count_wire
= count_wire
;
6145 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
6146 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
6149 clock_get_uptime(&end
);
6150 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6151 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
6153 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6154 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
6155 discard_all
? "did" : "could",
6156 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6158 if (hibernate_stats
.cd_skipped_xpmapped
)
6159 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats
.cd_skipped_xpmapped
);
6161 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
6163 if (preflight
&& will_discard
) *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
6165 #if MACH_ASSERT || DEBUG
6168 if (vm_page_local_q
) {
6169 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6171 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6172 VPL_UNLOCK(&lq
->vpl_lock
);
6175 vm_page_unlock_queues();
6177 #endif /* MACH_ASSERT || DEBUG */
6180 lck_mtx_unlock(&vm_page_queue_free_lock
);
6181 vm_page_unlock_queues();
6182 vm_object_unlock(compressor_object
);
6185 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
6189 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
6191 uint64_t start
, end
, nsec
;
6195 uint32_t count_discard_active
= 0;
6196 uint32_t count_discard_inactive
= 0;
6197 uint32_t count_discard_purgeable
= 0;
6198 uint32_t count_discard_cleaned
= 0;
6199 uint32_t count_discard_speculative
= 0;
6202 #if MACH_ASSERT || DEBUG
6203 vm_page_lock_queues();
6204 if (vm_page_local_q
) {
6205 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6207 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6208 VPL_LOCK(&lq
->vpl_lock
);
6211 #endif /* MACH_ASSERT || DEBUG */
6213 clock_get_uptime(&start
);
6215 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
6216 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
6218 next
= (vm_page_t
) m
->pageq
.next
;
6219 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6222 count_discard_purgeable
++;
6224 count_discard_inactive
++;
6225 hibernate_discard_page(m
);
6230 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
6232 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
6233 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
6235 next
= (vm_page_t
) m
->pageq
.next
;
6236 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6238 count_discard_speculative
++;
6239 hibernate_discard_page(m
);
6245 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
6246 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
6248 next
= (vm_page_t
) m
->pageq
.next
;
6249 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6252 count_discard_purgeable
++;
6254 count_discard_inactive
++;
6255 hibernate_discard_page(m
);
6260 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
6261 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
6263 next
= (vm_page_t
) m
->pageq
.next
;
6264 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6267 count_discard_purgeable
++;
6269 count_discard_active
++;
6270 hibernate_discard_page(m
);
6275 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
6276 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
6278 next
= (vm_page_t
) m
->pageq
.next
;
6279 if (hibernate_page_bittst(page_list
, m
->phys_page
))
6282 count_discard_purgeable
++;
6284 count_discard_cleaned
++;
6285 hibernate_discard_page(m
);
6290 #if MACH_ASSERT || DEBUG
6291 if (vm_page_local_q
) {
6292 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
6294 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
6295 VPL_UNLOCK(&lq
->vpl_lock
);
6298 vm_page_unlock_queues();
6299 #endif /* MACH_ASSERT || DEBUG */
6301 clock_get_uptime(&end
);
6302 absolutetime_to_nanoseconds(end
- start
, &nsec
);
6303 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6305 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
6308 boolean_t hibernate_paddr_map_inited
= FALSE
;
6309 boolean_t hibernate_rebuild_needed
= FALSE
;
6310 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
6311 vm_page_t hibernate_rebuild_hash_list
= NULL
;
6313 unsigned int hibernate_teardown_found_tabled_pages
= 0;
6314 unsigned int hibernate_teardown_found_created_pages
= 0;
6315 unsigned int hibernate_teardown_found_free_pages
= 0;
6316 unsigned int hibernate_teardown_vm_page_free_count
;
6319 struct ppnum_mapping
{
6320 struct ppnum_mapping
*ppnm_next
;
6321 ppnum_t ppnm_base_paddr
;
6322 unsigned int ppnm_sindx
;
6323 unsigned int ppnm_eindx
;
6326 struct ppnum_mapping
*ppnm_head
;
6327 struct ppnum_mapping
*ppnm_last_found
= NULL
;
6331 hibernate_create_paddr_map()
6334 ppnum_t next_ppnum_in_run
= 0;
6335 struct ppnum_mapping
*ppnm
= NULL
;
6337 if (hibernate_paddr_map_inited
== FALSE
) {
6339 for (i
= 0; i
< vm_pages_count
; i
++) {
6342 ppnm
->ppnm_eindx
= i
;
6344 if (ppnm
== NULL
|| vm_pages
[i
].phys_page
!= next_ppnum_in_run
) {
6346 ppnm
= kalloc(sizeof(struct ppnum_mapping
));
6348 ppnm
->ppnm_next
= ppnm_head
;
6351 ppnm
->ppnm_sindx
= i
;
6352 ppnm
->ppnm_base_paddr
= vm_pages
[i
].phys_page
;
6354 next_ppnum_in_run
= vm_pages
[i
].phys_page
+ 1;
6358 hibernate_paddr_map_inited
= TRUE
;
6363 hibernate_lookup_paddr(unsigned int indx
)
6365 struct ppnum_mapping
*ppnm
= NULL
;
6367 ppnm
= ppnm_last_found
;
6370 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
)
6373 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
6375 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
6376 ppnm_last_found
= ppnm
;
6381 panic("hibernate_lookup_paddr of %d failed\n", indx
);
6383 return (ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
));
6388 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6390 addr64_t saddr_aligned
;
6391 addr64_t eaddr_aligned
;
6394 unsigned int mark_as_unneeded_pages
= 0;
6396 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
6397 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
6399 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
6401 paddr
= pmap_find_phys(kernel_pmap
, addr
);
6405 hibernate_page_bitset(page_list
, TRUE
, paddr
);
6406 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
6408 mark_as_unneeded_pages
++;
6410 return (mark_as_unneeded_pages
);
6415 hibernate_hash_insert_page(vm_page_t mem
)
6417 vm_page_bucket_t
*bucket
;
6420 assert(mem
->hashed
);
6421 assert(mem
->object
);
6422 assert(mem
->offset
!= (vm_object_offset_t
) -1);
6425 * Insert it into the object_object/offset hash table
6427 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
6428 bucket
= &vm_page_buckets
[hash_id
];
6430 mem
->next_m
= bucket
->page_list
;
6431 bucket
->page_list
= VM_PAGE_PACK_PTR(mem
);
6436 hibernate_free_range(int sindx
, int eindx
)
6441 while (sindx
< eindx
) {
6442 mem
= &vm_pages
[sindx
];
6444 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
6446 mem
->lopage
= FALSE
;
6449 color
= mem
->phys_page
& vm_color_mask
;
6450 queue_enter_first(&vm_page_queue_free
[color
],
6454 vm_page_free_count
++;
6461 extern void hibernate_rebuild_pmap_structs(void);
6464 hibernate_rebuild_vm_structs(void)
6466 int cindx
, sindx
, eindx
;
6467 vm_page_t mem
, tmem
, mem_next
;
6468 AbsoluteTime startTime
, endTime
;
6471 if (hibernate_rebuild_needed
== FALSE
)
6474 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6475 HIBLOG("hibernate_rebuild started\n");
6477 clock_get_uptime(&startTime
);
6479 hibernate_rebuild_pmap_structs();
6481 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
6482 eindx
= vm_pages_count
;
6484 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
6486 mem
= &vm_pages
[cindx
];
6488 * hibernate_teardown_vm_structs leaves the location where
6489 * this vm_page_t must be located in "next".
6491 tmem
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6492 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6494 sindx
= (int)(tmem
- &vm_pages
[0]);
6498 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6499 * so move it back to its real location
6505 hibernate_hash_insert_page(mem
);
6507 * the 'hole' between this vm_page_t and the previous
6508 * vm_page_t we moved needs to be initialized as
6509 * a range of free vm_page_t's
6511 hibernate_free_range(sindx
+ 1, eindx
);
6516 hibernate_free_range(0, sindx
);
6518 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
6521 * process the list of vm_page_t's that were entered in the hash,
6522 * but were not located in the vm_pages arrary... these are
6523 * vm_page_t's that were created on the fly (i.e. fictitious)
6525 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
6526 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6528 mem
->next_m
= VM_PAGE_PACK_PTR(NULL
);
6529 hibernate_hash_insert_page(mem
);
6531 hibernate_rebuild_hash_list
= NULL
;
6533 clock_get_uptime(&endTime
);
6534 SUB_ABSOLUTETIME(&endTime
, &startTime
);
6535 absolutetime_to_nanoseconds(endTime
, &nsec
);
6537 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
6539 hibernate_rebuild_needed
= FALSE
;
6541 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
6545 extern void hibernate_teardown_pmap_structs(addr64_t
*, addr64_t
*);
6548 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6551 unsigned int compact_target_indx
;
6552 vm_page_t mem
, mem_next
;
6553 vm_page_bucket_t
*bucket
;
6554 unsigned int mark_as_unneeded_pages
= 0;
6555 unsigned int unneeded_vm_page_bucket_pages
= 0;
6556 unsigned int unneeded_vm_pages_pages
= 0;
6557 unsigned int unneeded_pmap_pages
= 0;
6558 addr64_t start_of_unneeded
= 0;
6559 addr64_t end_of_unneeded
= 0;
6562 if (hibernate_should_abort())
6565 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6566 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
6567 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
6569 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6571 bucket
= &vm_page_buckets
[i
];
6573 for (mem
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
6574 assert(mem
->hashed
);
6576 mem_next
= VM_PAGE_UNPACK_PTR(mem
->next_m
);
6578 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
6579 mem
->next_m
= VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list
);
6580 hibernate_rebuild_hash_list
= mem
;
6584 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
6585 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
6587 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
6589 compact_target_indx
= 0;
6591 for (i
= 0; i
< vm_pages_count
; i
++) {
6599 assert(!mem
->lopage
);
6601 color
= mem
->phys_page
& vm_color_mask
;
6603 queue_remove(&vm_page_queue_free
[color
],
6607 mem
->pageq
.next
= NULL
;
6608 mem
->pageq
.prev
= NULL
;
6610 vm_page_free_count
--;
6612 hibernate_teardown_found_free_pages
++;
6614 if ( !vm_pages
[compact_target_indx
].free
)
6615 compact_target_indx
= i
;
6618 * record this vm_page_t's original location
6619 * we need this even if it doesn't get moved
6620 * as an indicator to the rebuild function that
6621 * we don't have to move it
6623 mem
->next_m
= VM_PAGE_PACK_PTR(mem
);
6625 if (vm_pages
[compact_target_indx
].free
) {
6627 * we've got a hole to fill, so
6628 * move this vm_page_t to it's new home
6630 vm_pages
[compact_target_indx
] = *mem
;
6633 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
6634 compact_target_indx
++;
6636 hibernate_teardown_last_valid_compact_indx
= i
;
6639 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+1],
6640 (addr64_t
)&vm_pages
[vm_pages_count
-1], page_list
, page_list_wired
);
6641 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
6643 hibernate_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
6645 if (start_of_unneeded
) {
6646 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
6647 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
6649 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
6651 hibernate_rebuild_needed
= TRUE
;
6653 return (mark_as_unneeded_pages
);
6657 #endif /* HIBERNATION */
6659 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6661 #include <mach_vm_debug.h>
6664 #include <mach_debug/hash_info.h>
6665 #include <vm/vm_debug.h>
6668 * Routine: vm_page_info
6670 * Return information about the global VP table.
6671 * Fills the buffer with as much information as possible
6672 * and returns the desired size of the buffer.
6674 * Nothing locked. The caller should provide
6675 * possibly-pageable memory.
6680 hash_info_bucket_t
*info
,
6684 lck_spin_t
*bucket_lock
;
6686 if (vm_page_bucket_count
< count
)
6687 count
= vm_page_bucket_count
;
6689 for (i
= 0; i
< count
; i
++) {
6690 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
6691 unsigned int bucket_count
= 0;
6694 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6695 lck_spin_lock(bucket_lock
);
6697 for (m
= VM_PAGE_UNPACK_PTR(bucket
->page_list
); m
!= VM_PAGE_NULL
; m
= VM_PAGE_UNPACK_PTR(m
->next_m
))
6700 lck_spin_unlock(bucket_lock
);
6702 /* don't touch pageable memory while holding locks */
6703 info
[i
].hib_count
= bucket_count
;
6706 return vm_page_bucket_count
;
6708 #endif /* MACH_VM_DEBUG */
6710 #if VM_PAGE_BUCKETS_CHECK
6712 vm_page_buckets_check(void)
6716 unsigned int p_hash
;
6717 vm_page_bucket_t
*bucket
;
6718 lck_spin_t
*bucket_lock
;
6720 if (!vm_page_buckets_check_ready
) {
6725 if (hibernate_rebuild_needed
||
6726 hibernate_rebuild_hash_list
) {
6727 panic("BUCKET_CHECK: hibernation in progress: "
6728 "rebuild_needed=%d rebuild_hash_list=%p\n",
6729 hibernate_rebuild_needed
,
6730 hibernate_rebuild_hash_list
);
6732 #endif /* HIBERNATION */
6734 #if VM_PAGE_FAKE_BUCKETS
6736 for (cp
= (char *) vm_page_fake_buckets_start
;
6737 cp
< (char *) vm_page_fake_buckets_end
;
6740 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6741 "[0x%llx:0x%llx]\n",
6743 (uint64_t) vm_page_fake_buckets_start
,
6744 (uint64_t) vm_page_fake_buckets_end
);
6747 #endif /* VM_PAGE_FAKE_BUCKETS */
6749 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6750 bucket
= &vm_page_buckets
[i
];
6751 if (!bucket
->page_list
) {
6755 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6756 lck_spin_lock(bucket_lock
);
6757 p
= VM_PAGE_UNPACK_PTR(bucket
->page_list
);
6758 while (p
!= VM_PAGE_NULL
) {
6760 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6761 "hash %d in bucket %d at %p "
6763 p
, p
->object
, p
->offset
,
6766 p_hash
= vm_page_hash(p
->object
, p
->offset
);
6768 panic("BUCKET_CHECK: corruption in bucket %d "
6769 "at %p: page %p object %p offset 0x%llx "
6771 i
, bucket
, p
, p
->object
, p
->offset
,
6774 p
= VM_PAGE_UNPACK_PTR(p
->next_m
);
6776 lck_spin_unlock(bucket_lock
);
6779 // printf("BUCKET_CHECK: checked buckets\n");
6781 #endif /* VM_PAGE_BUCKETS_CHECK */
6784 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
6785 * local queues if they exist... its the only spot in the system where we add pages
6786 * to those queues... once on those queues, those pages can only move to one of the
6787 * global page queues or the free queues... they NEVER move from local q to local q.
6788 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
6789 * the global vm_page_queue_lock at this point... we still need to take the local lock
6790 * in case this operation is being run on a different CPU then the local queue's identity,
6791 * but we don't have to worry about the page moving to a global queue or becoming wired
6792 * while we're grabbing the local lock since those operations would require the global
6793 * vm_page_queue_lock to be held, and we already own it.
6795 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
6796 * 'wired' and local are ALWAYS mutually exclusive conditions.
6799 vm_page_queues_remove(vm_page_t mem
)
6801 boolean_t was_pageable
;
6803 VM_PAGE_QUEUES_ASSERT(mem
, 1);
6804 assert(!mem
->pageout_queue
);
6806 * if (mem->pageout_queue)
6807 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
6808 * the caller is responsible for determing if the page is on that queue, and if so, must
6809 * either first remove it (it needs both the page queues lock and the object lock to do
6810 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
6814 assert(mem
->object
!= kernel_object
);
6815 assert(mem
->object
!= compressor_object
);
6816 assert(!mem
->inactive
&& !mem
->speculative
);
6817 assert(!mem
->active
&& !mem
->throttled
);
6818 assert(!mem
->clean_queue
);
6819 assert(!mem
->fictitious
);
6820 lq
= &vm_page_local_q
[mem
->local_id
].vpl_un
.vpl
;
6821 VPL_LOCK(&lq
->vpl_lock
);
6822 queue_remove(&lq
->vpl_queue
,
6823 mem
, vm_page_t
, pageq
);
6827 if (mem
->object
->internal
) {
6828 lq
->vpl_internal_count
--;
6830 lq
->vpl_external_count
--;
6832 VPL_UNLOCK(&lq
->vpl_lock
);
6833 was_pageable
= FALSE
;
6836 else if (mem
->active
) {
6837 assert(mem
->object
!= kernel_object
);
6838 assert(mem
->object
!= compressor_object
);
6839 assert(!mem
->inactive
&& !mem
->speculative
);
6840 assert(!mem
->clean_queue
);
6841 assert(!mem
->throttled
);
6842 assert(!mem
->fictitious
);
6843 queue_remove(&vm_page_queue_active
,
6844 mem
, vm_page_t
, pageq
);
6845 mem
->active
= FALSE
;
6846 vm_page_active_count
--;
6847 was_pageable
= TRUE
;
6850 else if (mem
->inactive
) {
6851 assert(mem
->object
!= kernel_object
);
6852 assert(mem
->object
!= compressor_object
);
6853 assert(!mem
->active
&& !mem
->speculative
);
6854 assert(!mem
->throttled
);
6855 assert(!mem
->fictitious
);
6856 vm_page_inactive_count
--;
6857 if (mem
->clean_queue
) {
6858 queue_remove(&vm_page_queue_cleaned
,
6859 mem
, vm_page_t
, pageq
);
6860 mem
->clean_queue
= FALSE
;
6861 vm_page_cleaned_count
--;
6863 if (mem
->object
->internal
) {
6864 queue_remove(&vm_page_queue_anonymous
,
6865 mem
, vm_page_t
, pageq
);
6866 vm_page_anonymous_count
--;
6868 queue_remove(&vm_page_queue_inactive
,
6869 mem
, vm_page_t
, pageq
);
6871 vm_purgeable_q_advance_all();
6873 mem
->inactive
= FALSE
;
6874 was_pageable
= TRUE
;
6877 else if (mem
->throttled
) {
6878 assert(mem
->object
!= compressor_object
);
6879 assert(!mem
->active
&& !mem
->inactive
);
6880 assert(!mem
->speculative
);
6881 assert(!mem
->fictitious
);
6882 queue_remove(&vm_page_queue_throttled
,
6883 mem
, vm_page_t
, pageq
);
6884 mem
->throttled
= FALSE
;
6885 vm_page_throttled_count
--;
6886 was_pageable
= FALSE
;
6889 else if (mem
->speculative
) {
6890 assert(mem
->object
!= compressor_object
);
6891 assert(!mem
->active
&& !mem
->inactive
);
6892 assert(!mem
->throttled
);
6893 assert(!mem
->fictitious
);
6894 remque(&mem
->pageq
);
6895 mem
->speculative
= FALSE
;
6896 vm_page_speculative_count
--;
6897 was_pageable
= TRUE
;
6900 else if (mem
->pageq
.next
|| mem
->pageq
.prev
) {
6901 was_pageable
= FALSE
;
6902 panic("vm_page_queues_remove: unmarked page on Q");
6904 was_pageable
= FALSE
;
6907 mem
->pageq
.next
= NULL
;
6908 mem
->pageq
.prev
= NULL
;
6909 VM_PAGE_QUEUES_ASSERT(mem
, 0);
6911 if (mem
->object
->internal
) {
6912 vm_page_pageable_internal_count
--;
6914 vm_page_pageable_external_count
--;
6920 vm_page_remove_internal(vm_page_t page
)
6922 vm_object_t __object
= page
->object
;
6923 if (page
== __object
->memq_hint
) {
6924 vm_page_t __new_hint
;
6926 __qe
= queue_next(&page
->listq
);
6927 if (queue_end(&__object
->memq
, __qe
)) {
6928 __qe
= queue_prev(&page
->listq
);
6929 if (queue_end(&__object
->memq
, __qe
)) {
6933 __new_hint
= (vm_page_t
) __qe
;
6934 __object
->memq_hint
= __new_hint
;
6936 queue_remove(&__object
->memq
, page
, vm_page_t
, listq
);
6940 vm_page_enqueue_inactive(vm_page_t mem
, boolean_t first
)
6942 VM_PAGE_QUEUES_ASSERT(mem
, 0);
6943 assert(!mem
->fictitious
);
6944 assert(!mem
->laundry
);
6945 assert(!mem
->pageout_queue
);
6946 vm_page_check_pageable_safe(mem
);
6947 if (mem
->object
->internal
) {
6949 queue_enter_first(&vm_page_queue_anonymous
, mem
, vm_page_t
, pageq
);
6951 queue_enter(&vm_page_queue_anonymous
, mem
, vm_page_t
, pageq
);
6952 vm_page_anonymous_count
++;
6953 vm_page_pageable_internal_count
++;
6956 queue_enter_first(&vm_page_queue_inactive
, mem
, vm_page_t
, pageq
);
6958 queue_enter(&vm_page_queue_inactive
, mem
, vm_page_t
, pageq
);
6959 vm_page_pageable_external_count
++;
6961 mem
->inactive
= TRUE
;
6962 vm_page_inactive_count
++;
6963 token_new_pagecount
++;
6967 * Pages from special kernel objects shouldn't
6968 * be placed on pageable queues.
6971 vm_page_check_pageable_safe(vm_page_t page
)
6973 if (page
->object
== kernel_object
) {
6974 panic("vm_page_check_pageable_safe: trying to add page" \
6975 "from kernel object (%p) to pageable queue", kernel_object
);
6978 if (page
->object
== compressor_object
) {
6979 panic("vm_page_check_pageable_safe: trying to add page" \
6980 "from compressor object (%p) to pageable queue", compressor_object
);
6983 if (page
->object
== vm_submap_object
) {
6984 panic("vm_page_check_pageable_safe: trying to add page" \
6985 "from submap object (%p) to pageable queue", vm_submap_object
);
6989 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
6990 * wired page diagnose
6991 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6993 #include <libkern/OSKextLibPrivate.h>
6995 vm_allocation_site_t
*
6996 vm_allocation_sites
[VM_KERN_MEMORY_COUNT
];
7001 uintptr_t* frameptr
;
7002 uintptr_t* frameptr_next
;
7004 uintptr_t kstackb
, kstackt
;
7005 const vm_allocation_site_t
* site
;
7008 cthread
= current_thread();
7009 if (__improbable(cthread
== NULL
)) return VM_KERN_MEMORY_OSFMK
;
7011 kstackb
= cthread
->kernel_stack
;
7012 kstackt
= kstackb
+ kernel_stack_size
;
7014 /* Load stack frame pointer (EBP on x86) into frameptr */
7015 frameptr
= __builtin_frame_address(0);
7017 while (frameptr
!= NULL
)
7019 /* Verify thread stack bounds */
7020 if (((uintptr_t)(frameptr
+ 2) > kstackt
) || ((uintptr_t)frameptr
< kstackb
)) break;
7022 /* Next frame pointer is pointed to by the previous one */
7023 frameptr_next
= (uintptr_t*) *frameptr
;
7025 /* Pull return address from one spot above the frame pointer */
7026 retaddr
= *(frameptr
+ 1);
7028 if ((retaddr
< vm_kernel_stext
) || (retaddr
> vm_kernel_top
))
7030 site
= OSKextGetAllocationSiteForCaller(retaddr
);
7034 frameptr
= frameptr_next
;
7036 return (site
? site
->tag
: VM_KERN_MEMORY_NONE
);
7039 static uint64_t free_tag_bits
[256/64];
7042 vm_tag_alloc_locked(vm_allocation_site_t
* site
)
7048 if (site
->tag
) return;
7053 avail
= free_tag_bits
[idx
];
7056 tag
= __builtin_clzll(avail
);
7057 avail
&= ~(1ULL << (63 - tag
));
7058 free_tag_bits
[idx
] = avail
;
7063 if (idx
>= (sizeof(free_tag_bits
) / sizeof(free_tag_bits
[0])))
7065 tag
= VM_KERN_MEMORY_ANY
;
7070 if (VM_KERN_MEMORY_ANY
!= tag
)
7072 assert(!vm_allocation_sites
[tag
]);
7073 vm_allocation_sites
[tag
] = site
;
7078 vm_tag_free_locked(vm_tag_t tag
)
7084 if (VM_KERN_MEMORY_ANY
== tag
) return;
7087 avail
= free_tag_bits
[idx
];
7089 bit
= (1ULL << (63 - tag
));
7090 assert(!(avail
& bit
));
7091 free_tag_bits
[idx
] = (avail
| bit
);
7098 for (tag
= VM_KERN_MEMORY_FIRST_DYNAMIC
; tag
< VM_KERN_MEMORY_ANY
; tag
++)
7100 vm_tag_free_locked(tag
);
7105 vm_tag_alloc(vm_allocation_site_t
* site
)
7109 if (VM_TAG_BT
& site
->flags
)
7112 if (VM_KERN_MEMORY_NONE
!= tag
) return (tag
);
7117 lck_spin_lock(&vm_allocation_sites_lock
);
7118 vm_tag_alloc_locked(site
);
7119 lck_spin_unlock(&vm_allocation_sites_lock
);
7126 vm_page_count_object(mach_memory_info_t
* sites
, unsigned int __unused num_sites
, vm_object_t object
)
7128 if (!object
->wired_page_count
) return;
7129 if (object
!= kernel_object
)
7131 assert(object
->wire_tag
< num_sites
);
7132 sites
[object
->wire_tag
].size
+= ptoa_64(object
->wired_page_count
);
7136 typedef void (*vm_page_iterate_proc
)(mach_memory_info_t
* sites
,
7137 unsigned int num_sites
, vm_object_t object
);
7140 vm_page_iterate_purgeable_objects(mach_memory_info_t
* sites
, unsigned int num_sites
,
7141 vm_page_iterate_proc proc
, purgeable_q_t queue
,
7146 for (object
= (vm_object_t
) queue_first(&queue
->objq
[group
]);
7147 !queue_end(&queue
->objq
[group
], (queue_entry_t
) object
);
7148 object
= (vm_object_t
) queue_next(&object
->objq
))
7150 proc(sites
, num_sites
, object
);
7155 vm_page_iterate_objects(mach_memory_info_t
* sites
, unsigned int num_sites
,
7156 vm_page_iterate_proc proc
)
7158 purgeable_q_t volatile_q
;
7159 queue_head_t
* nonvolatile_q
;
7163 lck_spin_lock(&vm_objects_wired_lock
);
7164 queue_iterate(&vm_objects_wired
,
7169 proc(sites
, num_sites
, object
);
7171 lck_spin_unlock(&vm_objects_wired_lock
);
7173 lck_mtx_lock(&vm_purgeable_queue_lock
);
7174 nonvolatile_q
= &purgeable_nonvolatile_queue
;
7175 for (object
= (vm_object_t
) queue_first(nonvolatile_q
);
7176 !queue_end(nonvolatile_q
, (queue_entry_t
) object
);
7177 object
= (vm_object_t
) queue_next(&object
->objq
))
7179 proc(sites
, num_sites
, object
);
7182 volatile_q
= &purgeable_queues
[PURGEABLE_Q_TYPE_OBSOLETE
];
7183 vm_page_iterate_purgeable_objects(sites
, num_sites
, proc
, volatile_q
, 0);
7185 volatile_q
= &purgeable_queues
[PURGEABLE_Q_TYPE_FIFO
];
7186 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
7188 vm_page_iterate_purgeable_objects(sites
, num_sites
, proc
, volatile_q
, group
);
7191 volatile_q
= &purgeable_queues
[PURGEABLE_Q_TYPE_LIFO
];
7192 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
7194 vm_page_iterate_purgeable_objects(sites
, num_sites
, proc
, volatile_q
, group
);
7196 lck_mtx_unlock(&vm_purgeable_queue_lock
);
7200 process_account(mach_memory_info_t
* sites
, unsigned int __unused num_sites
)
7204 vm_allocation_site_t
* site
;
7206 assert(num_sites
>= VM_KERN_MEMORY_COUNT
);
7208 for (idx
= 0; idx
< VM_KERN_MEMORY_COUNT
; idx
++)
7210 found
+= sites
[idx
].size
;
7211 if (idx
< VM_KERN_MEMORY_FIRST_DYNAMIC
)
7213 sites
[idx
].site
= idx
;
7214 sites
[idx
].flags
|= VM_KERN_SITE_TAG
;
7215 if (VM_KERN_MEMORY_ZONE
== idx
) sites
[idx
].flags
|= VM_KERN_SITE_HIDE
;
7216 else sites
[idx
].flags
|= VM_KERN_SITE_WIRED
;
7219 lck_spin_lock(&vm_allocation_sites_lock
);
7220 if ((site
= vm_allocation_sites
[idx
]))
7222 if (sites
[idx
].size
)
7224 sites
[idx
].flags
|= VM_KERN_SITE_WIRED
;
7225 if (VM_TAG_KMOD
== (VM_KERN_SITE_TYPE
& site
->flags
))
7227 sites
[idx
].site
= OSKextGetKmodIDForSite(site
);
7228 sites
[idx
].flags
|= VM_KERN_SITE_KMOD
;
7232 sites
[idx
].site
= VM_KERNEL_UNSLIDE(site
);
7233 sites
[idx
].flags
|= VM_KERN_SITE_KERNEL
;
7242 /* this code would free a site with no allocations but can race a new
7243 * allocation being made */
7244 vm_tag_free_locked(site
->tag
);
7245 site
->tag
= VM_KERN_MEMORY_NONE
;
7246 vm_allocation_sites
[idx
] = NULL
;
7247 if (!(VM_TAG_UNLOAD
& site
->flags
)) site
= NULL
;
7251 lck_spin_unlock(&vm_allocation_sites_lock
);
7252 if (site
) OSKextFreeSite(site
);
7258 vm_page_diagnose(mach_memory_info_t
* sites
, unsigned int num_sites
)
7260 enum { kMaxKernelDepth
= 1 };
7261 vm_map_t maps
[kMaxKernelDepth
];
7262 vm_map_entry_t entries
[kMaxKernelDepth
];
7264 vm_map_entry_t entry
;
7265 vm_object_offset_t offset
;
7267 int stackIdx
, count
;
7268 uint64_t wired_size
;
7269 uint64_t wired_managed_size
;
7270 uint64_t wired_reserved_size
;
7271 mach_memory_info_t
* counts
;
7273 bzero(sites
, num_sites
* sizeof(mach_memory_info_t
));
7275 vm_page_iterate_objects(sites
, num_sites
, &vm_page_count_object
);
7277 wired_size
= ptoa_64(vm_page_wire_count
+ vm_lopage_free_count
+ vm_page_throttled_count
);
7278 wired_reserved_size
= ptoa_64(vm_page_wire_count_initial
- vm_page_stolen_count
+ vm_page_throttled_count
);
7279 wired_managed_size
= ptoa_64(vm_page_wire_count
- vm_page_wire_count_initial
);
7281 assert(num_sites
>= (VM_KERN_MEMORY_COUNT
+ VM_KERN_COUNTER_COUNT
));
7282 counts
= &sites
[VM_KERN_MEMORY_COUNT
];
7284 #define SET_COUNT(xcount, xsize, xflags) \
7285 counts[xcount].site = (xcount); \
7286 counts[xcount].size = (xsize); \
7287 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
7289 SET_COUNT(VM_KERN_COUNT_MANAGED
, ptoa_64(vm_page_pages
), 0);
7290 SET_COUNT(VM_KERN_COUNT_WIRED
, wired_size
, 0);
7291 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED
, wired_managed_size
, 0);
7292 SET_COUNT(VM_KERN_COUNT_RESERVED
, wired_reserved_size
, VM_KERN_SITE_WIRED
);
7293 SET_COUNT(VM_KERN_COUNT_STOLEN
, ptoa_64(vm_page_stolen_count
), VM_KERN_SITE_WIRED
);
7294 SET_COUNT(VM_KERN_COUNT_LOPAGE
, ptoa_64(vm_lopage_free_count
), VM_KERN_SITE_WIRED
);
7296 #define SET_MAP(xcount, xsize, xfree, xlargest) \
7297 counts[xcount].site = (xcount); \
7298 counts[xcount].size = (xsize); \
7299 counts[xcount].free = (xfree); \
7300 counts[xcount].largest = (xlargest); \
7301 counts[xcount].flags = VM_KERN_SITE_COUNTER;
7303 vm_map_size_t map_size
, map_free
, map_largest
;
7305 vm_map_sizes(kernel_map
, &map_size
, &map_free
, &map_largest
);
7306 SET_MAP(VM_KERN_COUNT_MAP_KERNEL
, map_size
, map_free
, map_largest
);
7308 vm_map_sizes(zone_map
, &map_size
, &map_free
, &map_largest
);
7309 SET_MAP(VM_KERN_COUNT_MAP_ZONE
, map_size
, map_free
, map_largest
);
7311 vm_map_sizes(kalloc_map
, &map_size
, &map_free
, &map_largest
);
7312 SET_MAP(VM_KERN_COUNT_MAP_KALLOC
, map_size
, map_free
, map_largest
);
7319 for (entry
= map
->hdr
.links
.next
; map
; entry
= entry
->links
.next
)
7321 if (entry
->is_sub_map
)
7323 assert(stackIdx
< kMaxKernelDepth
);
7324 maps
[stackIdx
] = map
;
7325 entries
[stackIdx
] = entry
;
7327 map
= VME_SUBMAP(entry
);
7331 if (VME_OBJECT(entry
) == kernel_object
)
7334 vm_object_lock(VME_OBJECT(entry
));
7335 for (offset
= entry
->links
.start
; offset
< entry
->links
.end
; offset
+= page_size
)
7337 page
= vm_page_lookup(VME_OBJECT(entry
), offset
);
7338 if (page
&& VM_PAGE_WIRED(page
)) count
++;
7340 vm_object_unlock(VME_OBJECT(entry
));
7344 assert(VME_ALIAS(entry
) < num_sites
);
7345 sites
[VME_ALIAS(entry
)].size
+= ptoa_64(count
);
7348 if (entry
== vm_map_last_entry(map
))
7351 if (!stackIdx
) map
= NULL
;
7355 map
= maps
[stackIdx
];
7356 entry
= entries
[stackIdx
];
7362 process_account(sites
, num_sites
);
7364 return (KERN_SUCCESS
);