2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
88 #include <pexpert/pexpert.h>
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
93 #include <vm/vm_compressor.h>
95 #include <IOKit/IOHibernatePrivate.h>
97 #include <sys/kdebug.h>
99 boolean_t hibernate_cleaning_in_progress
= FALSE
;
100 boolean_t vm_page_free_verify
= TRUE
;
102 uint32_t vm_lopage_free_count
= 0;
103 uint32_t vm_lopage_free_limit
= 0;
104 uint32_t vm_lopage_lowater
= 0;
105 boolean_t vm_lopage_refill
= FALSE
;
106 boolean_t vm_lopage_needed
= FALSE
;
108 lck_mtx_ext_t vm_page_queue_lock_ext
;
109 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
110 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
112 int speculative_age_index
= 0;
113 int speculative_steal_index
= 0;
114 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
117 __private_extern__
void vm_page_init_lck_grp(void);
119 static void vm_page_free_prepare(vm_page_t page
);
120 static vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
126 * Associated with page of user-allocatable memory is a
131 * These variables record the values returned by vm_page_bootstrap,
132 * for debugging purposes. The implementation of pmap_steal_memory
133 * and pmap_startup here also uses them internally.
136 vm_offset_t virtual_space_start
;
137 vm_offset_t virtual_space_end
;
138 uint32_t vm_page_pages
;
141 * The vm_page_lookup() routine, which provides for fast
142 * (virtual memory object, offset) to page lookup, employs
143 * the following hash table. The vm_page_{insert,remove}
144 * routines install and remove associations in the table.
145 * [This table is often called the virtual-to-physical,
150 #if MACH_PAGE_HASH_STATS
151 int cur_count
; /* current count */
152 int hi_count
; /* high water mark */
153 #endif /* MACH_PAGE_HASH_STATS */
157 #define BUCKETS_PER_LOCK 16
159 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
160 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
161 unsigned int vm_page_hash_mask
; /* Mask for hash function */
162 unsigned int vm_page_hash_shift
; /* Shift for hash function */
163 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
164 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
166 lck_spin_t
*vm_page_bucket_locks
;
169 #if MACH_PAGE_HASH_STATS
170 /* This routine is only for debug. It is intended to be called by
171 * hand by a developer using a kernel debugger. This routine prints
172 * out vm_page_hash table statistics to the kernel debug console.
182 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
183 if (vm_page_buckets
[i
].hi_count
) {
185 highsum
+= vm_page_buckets
[i
].hi_count
;
186 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
187 maxdepth
= vm_page_buckets
[i
].hi_count
;
190 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
191 printf("Number used buckets: %d = %d%%\n",
192 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
193 printf("Number unused buckets: %d = %d%%\n",
194 vm_page_bucket_count
- numbuckets
,
195 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
196 printf("Sum of bucket max depth: %d\n", highsum
);
197 printf("Average bucket depth: %d.%2d\n",
198 highsum
/vm_page_bucket_count
,
199 highsum%vm_page_bucket_count
);
200 printf("Maximum bucket depth: %d\n", maxdepth
);
202 #endif /* MACH_PAGE_HASH_STATS */
205 * The virtual page size is currently implemented as a runtime
206 * variable, but is constant once initialized using vm_set_page_size.
207 * This initialization must be done in the machine-dependent
208 * bootstrap sequence, before calling other machine-independent
211 * All references to the virtual page size outside this
212 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
215 vm_size_t page_size
= PAGE_SIZE
;
216 vm_size_t page_mask
= PAGE_MASK
;
217 int page_shift
= PAGE_SHIFT
;
220 * Resident page structures are initialized from
221 * a template (see vm_page_alloc).
223 * When adding a new field to the virtual memory
224 * object structure, be sure to add initialization
225 * (see vm_page_bootstrap).
227 struct vm_page vm_page_template
;
229 vm_page_t vm_pages
= VM_PAGE_NULL
;
230 unsigned int vm_pages_count
= 0;
231 ppnum_t vm_page_lowest
= 0;
234 * Resident pages that represent real memory
235 * are allocated from a set of free lists,
238 unsigned int vm_colors
;
239 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
240 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
241 queue_head_t vm_page_queue_free
[MAX_COLORS
];
242 unsigned int vm_page_free_wanted
;
243 unsigned int vm_page_free_wanted_privileged
;
244 unsigned int vm_page_free_count
;
245 unsigned int vm_page_fictitious_count
;
247 unsigned int vm_page_free_count_minimum
; /* debugging */
250 * Occasionally, the virtual memory system uses
251 * resident page structures that do not refer to
252 * real pages, for example to leave a page with
253 * important state information in the VP table.
255 * These page structures are allocated the way
256 * most other kernel structures are.
259 vm_locks_array_t vm_page_locks
;
260 decl_lck_mtx_data(,vm_page_alloc_lock
)
261 lck_mtx_ext_t vm_page_alloc_lock_ext
;
263 unsigned int io_throttle_zero_fill
;
265 unsigned int vm_page_local_q_count
= 0;
266 unsigned int vm_page_local_q_soft_limit
= 250;
267 unsigned int vm_page_local_q_hard_limit
= 500;
268 struct vplq
*vm_page_local_q
= NULL
;
270 /* N.B. Guard and fictitious pages must not
271 * be assigned a zero phys_page value.
274 * Fictitious pages don't have a physical address,
275 * but we must initialize phys_page to something.
276 * For debugging, this should be a strange value
277 * that the pmap module can recognize in assertions.
279 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
282 * Guard pages are not accessible so they don't
283 * need a physical address, but we need to enter
285 * Let's make it recognizable and make sure that
286 * we don't use a real physical page with that
289 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
292 * Resident page structures are also chained on
293 * queues that are used by the page replacement
294 * system (pageout daemon). These queues are
295 * defined here, but are shared by the pageout
296 * module. The inactive queue is broken into
297 * file backed and anonymous for convenience as the
298 * pageout daemon often assignes a higher
299 * importance to anonymous pages (less likely to pick)
301 queue_head_t vm_page_queue_active
;
302 queue_head_t vm_page_queue_inactive
;
303 queue_head_t vm_page_queue_anonymous
; /* inactive memory queue for anonymous pages */
304 queue_head_t vm_page_queue_throttled
;
306 unsigned int vm_page_active_count
;
307 unsigned int vm_page_inactive_count
;
308 unsigned int vm_page_anonymous_count
;
309 unsigned int vm_page_throttled_count
;
310 unsigned int vm_page_speculative_count
;
311 unsigned int vm_page_wire_count
;
312 unsigned int vm_page_wire_count_initial
;
313 unsigned int vm_page_gobble_count
= 0;
314 unsigned int vm_page_wire_count_warning
= 0;
315 unsigned int vm_page_gobble_count_warning
= 0;
317 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
318 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
319 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
321 unsigned int vm_page_external_count
= 0;
322 unsigned int vm_page_internal_count
= 0;
323 unsigned int vm_page_pageable_external_count
= 0;
324 unsigned int vm_page_pageable_internal_count
= 0;
326 #if DEVELOPMENT || DEBUG
327 unsigned int vm_page_speculative_recreated
= 0;
328 unsigned int vm_page_speculative_created
= 0;
329 unsigned int vm_page_speculative_used
= 0;
332 queue_head_t vm_page_queue_cleaned
;
334 unsigned int vm_page_cleaned_count
= 0;
335 unsigned int vm_pageout_enqueued_cleaned
= 0;
337 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
338 ppnum_t max_valid_low_ppnum
= 0xffffffff;
342 * Several page replacement parameters are also
343 * shared with this module, so that page allocation
344 * (done here in vm_page_alloc) can trigger the
347 unsigned int vm_page_free_target
= 0;
348 unsigned int vm_page_free_min
= 0;
349 unsigned int vm_page_throttle_limit
= 0;
350 uint32_t vm_page_creation_throttle
= 0;
351 unsigned int vm_page_inactive_target
= 0;
352 unsigned int vm_page_anonymous_min
= 0;
353 unsigned int vm_page_inactive_min
= 0;
354 unsigned int vm_page_free_reserved
= 0;
355 unsigned int vm_page_throttle_count
= 0;
359 * The VM system has a couple of heuristics for deciding
360 * that pages are "uninteresting" and should be placed
361 * on the inactive queue as likely candidates for replacement.
362 * These variables let the heuristics be controlled at run-time
363 * to make experimentation easier.
366 boolean_t vm_page_deactivate_hint
= TRUE
;
368 struct vm_page_stats_reusable vm_page_stats_reusable
;
373 * Sets the page size, perhaps based upon the memory
374 * size. Must be called before any use of page-size
375 * dependent functions.
377 * Sets page_shift and page_mask from page_size.
380 vm_set_page_size(void)
382 page_mask
= page_size
- 1;
384 if ((page_mask
& page_size
) != 0)
385 panic("vm_set_page_size: page size not a power of two");
387 for (page_shift
= 0; ; page_shift
++)
388 if ((1U << page_shift
) == page_size
)
393 /* Called once during statup, once the cache geometry is known.
396 vm_page_set_colors( void )
398 unsigned int n
, override
;
400 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
402 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
403 n
= vm_cache_geometry_colors
;
404 else n
= DEFAULT_COLORS
; /* use default if all else fails */
408 if ( n
> MAX_COLORS
)
411 /* the count must be a power of 2 */
412 if ( ( n
& (n
- 1)) != 0 )
413 panic("vm_page_set_colors");
416 vm_color_mask
= n
- 1;
420 lck_grp_t vm_page_lck_grp_free
;
421 lck_grp_t vm_page_lck_grp_queue
;
422 lck_grp_t vm_page_lck_grp_local
;
423 lck_grp_t vm_page_lck_grp_purge
;
424 lck_grp_t vm_page_lck_grp_alloc
;
425 lck_grp_t vm_page_lck_grp_bucket
;
426 lck_grp_attr_t vm_page_lck_grp_attr
;
427 lck_attr_t vm_page_lck_attr
;
430 __private_extern__
void
431 vm_page_init_lck_grp(void)
434 * initialze the vm_page lock world
436 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
437 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
438 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
439 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
440 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
441 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
442 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
443 lck_attr_setdefault(&vm_page_lck_attr
);
444 lck_mtx_init_ext(&vm_page_alloc_lock
, &vm_page_alloc_lock_ext
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
446 vm_compressor_init_locks();
450 vm_page_init_local_q()
452 unsigned int num_cpus
;
454 struct vplq
*t_local_q
;
456 num_cpus
= ml_get_max_cpus();
459 * no point in this for a uni-processor system
462 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
464 for (i
= 0; i
< num_cpus
; i
++) {
467 lq
= &t_local_q
[i
].vpl_un
.vpl
;
468 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
469 queue_init(&lq
->vpl_queue
);
471 lq
->vpl_internal_count
= 0;
472 lq
->vpl_external_count
= 0;
474 vm_page_local_q_count
= num_cpus
;
476 vm_page_local_q
= (struct vplq
*)t_local_q
;
484 * Initializes the resident memory module.
486 * Allocates memory for the page cells, and
487 * for the object/offset-to-page hash table headers.
488 * Each page cell is initialized and placed on the free list.
489 * Returns the range of available kernel virtual memory.
497 register vm_page_t m
;
504 * Initialize the vm_page template.
507 m
= &vm_page_template
;
508 bzero(m
, sizeof (*m
));
510 m
->pageq
.next
= NULL
;
511 m
->pageq
.prev
= NULL
;
512 m
->listq
.next
= NULL
;
513 m
->listq
.prev
= NULL
;
514 m
->next
= VM_PAGE_NULL
;
516 m
->object
= VM_OBJECT_NULL
; /* reset later */
517 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
523 m
->pageout_queue
= FALSE
;
524 m
->speculative
= FALSE
;
527 m
->reference
= FALSE
;
530 m
->throttled
= FALSE
;
531 m
->__unused_pageq_bits
= 0;
533 m
->phys_page
= 0; /* reset later */
538 m
->fictitious
= FALSE
;
547 m
->clustered
= FALSE
;
548 m
->overwriting
= FALSE
;
551 m
->encrypted
= FALSE
;
552 m
->encrypted_cleaning
= FALSE
;
553 m
->cs_validated
= FALSE
;
554 m
->cs_tainted
= FALSE
;
558 m
->was_dirty
= FALSE
;
560 m
->compressor
= FALSE
;
561 m
->__unused_object_bits
= 0;
564 * Initialize the page queues.
566 vm_page_init_lck_grp();
568 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
569 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
570 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
572 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
575 purgeable_queues
[i
].token_q_head
= 0;
576 purgeable_queues
[i
].token_q_tail
= 0;
577 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
578 queue_init(&purgeable_queues
[i
].objq
[group
]);
580 purgeable_queues
[i
].type
= i
;
581 purgeable_queues
[i
].new_pages
= 0;
583 purgeable_queues
[i
].debug_count_tokens
= 0;
584 purgeable_queues
[i
].debug_count_objects
= 0;
588 for (i
= 0; i
< MAX_COLORS
; i
++ )
589 queue_init(&vm_page_queue_free
[i
]);
591 queue_init(&vm_lopage_queue_free
);
592 queue_init(&vm_page_queue_active
);
593 queue_init(&vm_page_queue_inactive
);
594 queue_init(&vm_page_queue_cleaned
);
595 queue_init(&vm_page_queue_throttled
);
596 queue_init(&vm_page_queue_anonymous
);
598 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
599 queue_init(&vm_page_queue_speculative
[i
].age_q
);
601 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
602 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
604 vm_page_free_wanted
= 0;
605 vm_page_free_wanted_privileged
= 0;
607 vm_page_set_colors();
611 * Steal memory for the map and zone subsystems.
614 vm_map_steal_memory();
617 * Allocate (and initialize) the virtual-to-physical
618 * table hash buckets.
620 * The number of buckets should be a power of two to
621 * get a good hash function. The following computation
622 * chooses the first power of two that is greater
623 * than the number of physical pages in the system.
626 if (vm_page_bucket_count
== 0) {
627 unsigned int npages
= pmap_free_pages();
629 vm_page_bucket_count
= 1;
630 while (vm_page_bucket_count
< npages
)
631 vm_page_bucket_count
<<= 1;
633 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
635 vm_page_hash_mask
= vm_page_bucket_count
- 1;
638 * Calculate object shift value for hashing algorithm:
639 * O = log2(sizeof(struct vm_object))
640 * B = log2(vm_page_bucket_count)
641 * hash shifts the object left by
644 size
= vm_page_bucket_count
;
645 for (log1
= 0; size
> 1; log1
++)
647 size
= sizeof(struct vm_object
);
648 for (log2
= 0; size
> 1; log2
++)
650 vm_page_hash_shift
= log1
/2 - log2
+ 1;
652 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
653 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
654 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
656 if (vm_page_hash_mask
& vm_page_bucket_count
)
657 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
659 vm_page_buckets
= (vm_page_bucket_t
*)
660 pmap_steal_memory(vm_page_bucket_count
*
661 sizeof(vm_page_bucket_t
));
663 vm_page_bucket_locks
= (lck_spin_t
*)
664 pmap_steal_memory(vm_page_bucket_lock_count
*
667 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
668 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
670 bucket
->pages
= VM_PAGE_NULL
;
671 #if MACH_PAGE_HASH_STATS
672 bucket
->cur_count
= 0;
673 bucket
->hi_count
= 0;
674 #endif /* MACH_PAGE_HASH_STATS */
677 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
678 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
681 * Machine-dependent code allocates the resident page table.
682 * It uses vm_page_init to initialize the page frames.
683 * The code also returns to us the virtual space available
684 * to the kernel. We don't trust the pmap module
685 * to get the alignment right.
688 pmap_startup(&virtual_space_start
, &virtual_space_end
);
689 virtual_space_start
= round_page(virtual_space_start
);
690 virtual_space_end
= trunc_page(virtual_space_end
);
692 *startp
= virtual_space_start
;
693 *endp
= virtual_space_end
;
696 * Compute the initial "wire" count.
697 * Up until now, the pages which have been set aside are not under
698 * the VM system's control, so although they aren't explicitly
699 * wired, they nonetheless can't be moved. At this moment,
700 * all VM managed pages are "free", courtesy of pmap_startup.
702 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
703 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
704 vm_page_wire_count_initial
= vm_page_wire_count
;
705 vm_page_free_count_minimum
= vm_page_free_count
;
707 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
708 vm_page_free_count
, vm_page_wire_count
);
710 simple_lock_init(&vm_paging_lock
, 0);
713 #ifndef MACHINE_PAGES
715 * We implement pmap_steal_memory and pmap_startup with the help
716 * of two simpler functions, pmap_virtual_space and pmap_next_page.
723 vm_offset_t addr
, vaddr
;
727 * We round the size to a round multiple.
730 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
733 * If this is the first call to pmap_steal_memory,
734 * we have to initialize ourself.
737 if (virtual_space_start
== virtual_space_end
) {
738 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
741 * The initial values must be aligned properly, and
742 * we don't trust the pmap module to do it right.
745 virtual_space_start
= round_page(virtual_space_start
);
746 virtual_space_end
= trunc_page(virtual_space_end
);
750 * Allocate virtual memory for this request.
753 addr
= virtual_space_start
;
754 virtual_space_start
+= size
;
756 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
759 * Allocate and map physical pages to back new virtual pages.
762 for (vaddr
= round_page(addr
);
764 vaddr
+= PAGE_SIZE
) {
766 if (!pmap_next_page_hi(&phys_page
))
767 panic("pmap_steal_memory");
770 * XXX Logically, these mappings should be wired,
771 * but some pmap modules barf if they are.
773 #if defined(__LP64__)
774 pmap_pre_expand(kernel_pmap
, vaddr
);
777 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
778 VM_PROT_READ
|VM_PROT_WRITE
, VM_PROT_NONE
,
779 VM_WIMG_USE_DEFAULT
, FALSE
);
781 * Account for newly stolen memory
783 vm_page_wire_count
++;
787 return (void *) addr
;
795 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
800 * We calculate how many page frames we will have
801 * and then allocate the page structures in one chunk.
804 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
805 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
806 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
808 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
811 * Initialize the page frames.
813 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
814 if (!pmap_next_page(&phys_page
))
816 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
817 vm_page_lowest
= phys_page
;
819 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
823 vm_pages_count
= pages_initialized
;
826 * Check if we want to initialize pages to a known value
828 fill
= 0; /* Assume no fill */
829 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
831 /* This slows down booting the DEBUG kernel, particularly on
832 * large memory systems, but is worthwhile in deterministically
833 * trapping uninitialized memory usage.
837 fillval
= 0xDEB8F177;
841 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval
);
842 // -debug code remove
843 if (2 == vm_himemory_mode
) {
844 // free low -> high so high is preferred
845 for (i
= 1; i
<= pages_initialized
; i
++) {
846 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
847 vm_page_release(&vm_pages
[i
- 1]);
851 // debug code remove-
854 * Release pages in reverse order so that physical pages
855 * initially get allocated in ascending addresses. This keeps
856 * the devices (which must address physical memory) happy if
857 * they require several consecutive pages.
859 for (i
= pages_initialized
; i
> 0; i
--) {
860 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
861 vm_page_release(&vm_pages
[i
- 1]);
866 vm_page_t xx
, xxo
, xxl
;
869 j
= 0; /* (BRINGUP) */
872 for( i
= 0; i
< vm_colors
; i
++ ) {
873 queue_iterate(&vm_page_queue_free
[i
],
876 pageq
) { /* BRINGUP */
878 if(j
> vm_page_free_count
) { /* (BRINGUP) */
879 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
882 l
= vm_page_free_count
- j
; /* (BRINGUP) */
883 k
= 0; /* (BRINGUP) */
885 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
887 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
889 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
890 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
891 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
899 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
900 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
907 * We have to re-align virtual_space_start,
908 * because pmap_steal_memory has been using it.
911 virtual_space_start
= round_page(virtual_space_start
);
913 *startp
= virtual_space_start
;
914 *endp
= virtual_space_end
;
916 #endif /* MACHINE_PAGES */
919 * Routine: vm_page_module_init
921 * Second initialization pass, to be done after
922 * the basic VM system is ready.
925 vm_page_module_init(void)
927 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
928 0, PAGE_SIZE
, "vm pages");
931 zone_debug_disable(vm_page_zone
);
932 #endif /* ZONE_DEBUG */
934 zone_change(vm_page_zone
, Z_CALLERACCT
, FALSE
);
935 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
936 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
937 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
938 zone_change(vm_page_zone
, Z_GZALLOC_EXEMPT
, TRUE
);
940 * Adjust zone statistics to account for the real pages allocated
941 * in vm_page_create(). [Q: is this really what we want?]
943 vm_page_zone
->count
+= vm_page_pages
;
944 vm_page_zone
->sum_count
+= vm_page_pages
;
945 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
949 * Routine: vm_page_create
951 * After the VM system is up, machine-dependent code
952 * may stumble across more physical memory. For example,
953 * memory that it was reserving for a frame buffer.
954 * vm_page_create turns this memory into available pages.
965 for (phys_page
= start
;
968 while ((m
= (vm_page_t
) vm_page_grab_fictitious_common(phys_page
))
970 vm_page_more_fictitious();
972 m
->fictitious
= FALSE
;
973 pmap_clear_noencrypt(phys_page
);
983 * Distributes the object/offset key pair among hash buckets.
985 * NOTE: The bucket count must be a power of 2
987 #define vm_page_hash(object, offset) (\
988 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
993 * vm_page_insert: [ internal use only ]
995 * Inserts the given mem entry into the object/object-page
996 * table and object list.
998 * The object must be locked.
1004 vm_object_offset_t offset
)
1006 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
, FALSE
);
1010 vm_page_insert_internal(
1013 vm_object_offset_t offset
,
1014 boolean_t queues_lock_held
,
1015 boolean_t insert_in_hash
,
1016 boolean_t batch_pmap_op
)
1018 vm_page_bucket_t
*bucket
;
1019 lck_spin_t
*bucket_lock
;
1023 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1024 object
, offset
, mem
, 0,0);
1027 * we may not hold the page queue lock
1028 * so this check isn't safe to make
1033 assert(page_aligned(offset
));
1035 if (object
== vm_submap_object
) {
1036 /* the vm_submap_object is only a placeholder for submaps */
1037 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset
);
1040 vm_object_lock_assert_exclusive(object
);
1042 lck_mtx_assert(&vm_page_queue_lock
,
1043 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1044 : LCK_MTX_ASSERT_NOTOWNED
);
1047 if (insert_in_hash
== TRUE
) {
1049 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1050 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1051 "already in (obj=%p,off=0x%llx)",
1052 mem
, object
, offset
, mem
->object
, mem
->offset
);
1054 assert(!object
->internal
|| offset
< object
->vo_size
);
1056 /* only insert "pageout" pages into "pageout" objects,
1057 * and normal pages into normal objects */
1058 assert(object
->pageout
== mem
->pageout
);
1060 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1063 * Record the object/offset pair in this page
1066 mem
->object
= object
;
1067 mem
->offset
= offset
;
1070 * Insert it into the object_object/offset hash table
1072 hash_id
= vm_page_hash(object
, offset
);
1073 bucket
= &vm_page_buckets
[hash_id
];
1074 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1076 lck_spin_lock(bucket_lock
);
1078 mem
->next
= bucket
->pages
;
1079 bucket
->pages
= mem
;
1080 #if MACH_PAGE_HASH_STATS
1081 if (++bucket
->cur_count
> bucket
->hi_count
)
1082 bucket
->hi_count
= bucket
->cur_count
;
1083 #endif /* MACH_PAGE_HASH_STATS */
1085 lck_spin_unlock(bucket_lock
);
1089 unsigned int cache_attr
;
1091 cache_attr
= object
->wimg_bits
& VM_WIMG_MASK
;
1093 if (cache_attr
!= VM_WIMG_USE_DEFAULT
) {
1094 PMAP_SET_CACHE_ATTR(mem
, object
, cache_attr
, batch_pmap_op
);
1098 * Now link into the object's list of backed pages.
1100 VM_PAGE_INSERT(mem
, object
);
1104 * Show that the object has one more resident page.
1107 object
->resident_page_count
++;
1108 if (VM_PAGE_WIRED(mem
)) {
1109 object
->wired_page_count
++;
1111 assert(object
->resident_page_count
>= object
->wired_page_count
);
1113 if (object
->internal
) {
1114 OSAddAtomic(1, &vm_page_internal_count
);
1116 OSAddAtomic(1, &vm_page_external_count
);
1120 * It wouldn't make sense to insert a "reusable" page in
1121 * an object (the page would have been marked "reusable" only
1122 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1123 * in the object at that time).
1124 * But a page could be inserted in a "all_reusable" object, if
1125 * something faults it in (a vm_read() from another task or a
1126 * "use-after-free" issue in user space, for example). It can
1127 * also happen if we're relocating a page from that object to
1128 * a different physical page during a physically-contiguous
1131 assert(!mem
->reusable
);
1132 if (mem
->object
->all_reusable
) {
1133 OSAddAtomic(+1, &vm_page_stats_reusable
.reusable_count
);
1136 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1137 if (VM_PAGE_WIRED(mem
)) {
1138 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
1140 OSAddAtomic(1, &vm_page_purgeable_count
);
1142 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1145 * This page belongs to a purged VM object but hasn't
1146 * been purged (because it was "busy").
1147 * It's in the "throttled" queue and hence not
1148 * visible to vm_pageout_scan(). Move it to a pageable
1149 * queue, so that it can eventually be reclaimed, instead
1150 * of lingering in the "empty" object.
1152 if (queues_lock_held
== FALSE
)
1153 vm_page_lockspin_queues();
1154 vm_page_deactivate(mem
);
1155 if (queues_lock_held
== FALSE
)
1156 vm_page_unlock_queues();
1163 * Exactly like vm_page_insert, except that we first
1164 * remove any existing page at the given offset in object.
1166 * The object must be locked.
1170 register vm_page_t mem
,
1171 register vm_object_t object
,
1172 register vm_object_offset_t offset
)
1174 vm_page_bucket_t
*bucket
;
1175 vm_page_t found_m
= VM_PAGE_NULL
;
1176 lck_spin_t
*bucket_lock
;
1181 * we don't hold the page queue lock
1182 * so this check isn't safe to make
1186 vm_object_lock_assert_exclusive(object
);
1188 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1189 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1190 "already in (obj=%p,off=0x%llx)",
1191 mem
, object
, offset
, mem
->object
, mem
->offset
);
1192 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1195 * Record the object/offset pair in this page
1198 mem
->object
= object
;
1199 mem
->offset
= offset
;
1202 * Insert it into the object_object/offset hash table,
1203 * replacing any page that might have been there.
1206 hash_id
= vm_page_hash(object
, offset
);
1207 bucket
= &vm_page_buckets
[hash_id
];
1208 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1210 lck_spin_lock(bucket_lock
);
1212 if (bucket
->pages
) {
1213 vm_page_t
*mp
= &bucket
->pages
;
1217 if (m
->object
== object
&& m
->offset
== offset
) {
1219 * Remove old page from hash list
1227 } while ((m
= *mp
));
1229 mem
->next
= bucket
->pages
;
1231 mem
->next
= VM_PAGE_NULL
;
1234 * insert new page at head of hash list
1236 bucket
->pages
= mem
;
1238 lck_spin_unlock(bucket_lock
);
1242 * there was already a page at the specified
1243 * offset for this object... remove it from
1244 * the object and free it back to the free list
1246 vm_page_free_unlocked(found_m
, FALSE
);
1248 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
, FALSE
);
1252 * vm_page_remove: [ internal use only ]
1254 * Removes the given mem entry from the object/offset-page
1255 * table and the object page list.
1257 * The object must be locked.
1263 boolean_t remove_from_hash
)
1265 vm_page_bucket_t
*bucket
;
1267 lck_spin_t
*bucket_lock
;
1271 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1272 mem
->object
, mem
->offset
,
1275 vm_object_lock_assert_exclusive(mem
->object
);
1276 assert(mem
->tabled
);
1277 assert(!mem
->cleaning
);
1278 assert(!mem
->laundry
);
1281 * we don't hold the page queue lock
1282 * so this check isn't safe to make
1286 if (remove_from_hash
== TRUE
) {
1288 * Remove from the object_object/offset hash table
1290 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1291 bucket
= &vm_page_buckets
[hash_id
];
1292 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1294 lck_spin_lock(bucket_lock
);
1296 if ((this = bucket
->pages
) == mem
) {
1297 /* optimize for common case */
1299 bucket
->pages
= mem
->next
;
1303 for (prev
= &this->next
;
1304 (this = *prev
) != mem
;
1309 #if MACH_PAGE_HASH_STATS
1310 bucket
->cur_count
--;
1311 #endif /* MACH_PAGE_HASH_STATS */
1313 lck_spin_unlock(bucket_lock
);
1316 * Now remove from the object's list of backed pages.
1319 VM_PAGE_REMOVE(mem
);
1322 * And show that the object has one fewer resident
1326 assert(mem
->object
->resident_page_count
> 0);
1327 mem
->object
->resident_page_count
--;
1329 if (mem
->object
->internal
) {
1330 assert(vm_page_internal_count
);
1331 OSAddAtomic(-1, &vm_page_internal_count
);
1333 assert(vm_page_external_count
);
1334 OSAddAtomic(-1, &vm_page_external_count
);
1336 if (!mem
->object
->internal
&& (mem
->object
->objq
.next
|| mem
->object
->objq
.prev
)) {
1337 if (mem
->object
->resident_page_count
== 0)
1338 vm_object_cache_remove(mem
->object
);
1341 if (VM_PAGE_WIRED(mem
)) {
1342 assert(mem
->object
->wired_page_count
> 0);
1343 mem
->object
->wired_page_count
--;
1345 assert(mem
->object
->resident_page_count
>=
1346 mem
->object
->wired_page_count
);
1347 if (mem
->reusable
) {
1348 assert(mem
->object
->reusable_page_count
> 0);
1349 mem
->object
->reusable_page_count
--;
1350 assert(mem
->object
->reusable_page_count
<=
1351 mem
->object
->resident_page_count
);
1352 mem
->reusable
= FALSE
;
1353 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1354 vm_page_stats_reusable
.reused_remove
++;
1355 } else if (mem
->object
->all_reusable
) {
1356 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1357 vm_page_stats_reusable
.reused_remove
++;
1360 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1361 if (VM_PAGE_WIRED(mem
)) {
1362 assert(vm_page_purgeable_wired_count
> 0);
1363 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1365 assert(vm_page_purgeable_count
> 0);
1366 OSAddAtomic(-1, &vm_page_purgeable_count
);
1369 if (mem
->object
->set_cache_attr
== TRUE
)
1370 pmap_set_cache_attributes(mem
->phys_page
, 0);
1372 mem
->tabled
= FALSE
;
1373 mem
->object
= VM_OBJECT_NULL
;
1374 mem
->offset
= (vm_object_offset_t
) -1;
1381 * Returns the page associated with the object/offset
1382 * pair specified; if none is found, VM_PAGE_NULL is returned.
1384 * The object must be locked. No side effects.
1387 unsigned long vm_page_lookup_hint
= 0;
1388 unsigned long vm_page_lookup_hint_next
= 0;
1389 unsigned long vm_page_lookup_hint_prev
= 0;
1390 unsigned long vm_page_lookup_hint_miss
= 0;
1391 unsigned long vm_page_lookup_bucket_NULL
= 0;
1392 unsigned long vm_page_lookup_miss
= 0;
1398 vm_object_offset_t offset
)
1401 vm_page_bucket_t
*bucket
;
1403 lck_spin_t
*bucket_lock
;
1406 vm_object_lock_assert_held(object
);
1407 mem
= object
->memq_hint
;
1409 if (mem
!= VM_PAGE_NULL
) {
1410 assert(mem
->object
== object
);
1412 if (mem
->offset
== offset
) {
1413 vm_page_lookup_hint
++;
1416 qe
= queue_next(&mem
->listq
);
1418 if (! queue_end(&object
->memq
, qe
)) {
1419 vm_page_t next_page
;
1421 next_page
= (vm_page_t
) qe
;
1422 assert(next_page
->object
== object
);
1424 if (next_page
->offset
== offset
) {
1425 vm_page_lookup_hint_next
++;
1426 object
->memq_hint
= next_page
; /* new hint */
1430 qe
= queue_prev(&mem
->listq
);
1432 if (! queue_end(&object
->memq
, qe
)) {
1433 vm_page_t prev_page
;
1435 prev_page
= (vm_page_t
) qe
;
1436 assert(prev_page
->object
== object
);
1438 if (prev_page
->offset
== offset
) {
1439 vm_page_lookup_hint_prev
++;
1440 object
->memq_hint
= prev_page
; /* new hint */
1446 * Search the hash table for this object/offset pair
1448 hash_id
= vm_page_hash(object
, offset
);
1449 bucket
= &vm_page_buckets
[hash_id
];
1452 * since we hold the object lock, we are guaranteed that no
1453 * new pages can be inserted into this object... this in turn
1454 * guarantess that the page we're looking for can't exist
1455 * if the bucket it hashes to is currently NULL even when looked
1456 * at outside the scope of the hash bucket lock... this is a
1457 * really cheap optimiztion to avoid taking the lock
1459 if (bucket
->pages
== VM_PAGE_NULL
) {
1460 vm_page_lookup_bucket_NULL
++;
1462 return (VM_PAGE_NULL
);
1464 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1466 lck_spin_lock(bucket_lock
);
1468 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1471 * we don't hold the page queue lock
1472 * so this check isn't safe to make
1476 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1479 lck_spin_unlock(bucket_lock
);
1481 if (mem
!= VM_PAGE_NULL
) {
1482 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1483 vm_page_lookup_hint_miss
++;
1485 assert(mem
->object
== object
);
1486 object
->memq_hint
= mem
;
1488 vm_page_lookup_miss
++;
1497 * Move the given memory entry from its
1498 * current object to the specified target object/offset.
1500 * The object must be locked.
1504 register vm_page_t mem
,
1505 register vm_object_t new_object
,
1506 vm_object_offset_t new_offset
,
1507 boolean_t encrypted_ok
)
1509 boolean_t internal_to_external
, external_to_internal
;
1511 assert(mem
->object
!= new_object
);
1515 * The encryption key is based on the page's memory object
1516 * (aka "pager") and paging offset. Moving the page to
1517 * another VM object changes its "pager" and "paging_offset"
1518 * so it has to be decrypted first, or we would lose the key.
1520 * One exception is VM object collapsing, where we transfer pages
1521 * from one backing object to its parent object. This operation also
1522 * transfers the paging information, so the <pager,paging_offset> info
1523 * should remain consistent. The caller (vm_object_do_collapse())
1524 * sets "encrypted_ok" in this case.
1526 if (!encrypted_ok
&& mem
->encrypted
) {
1527 panic("vm_page_rename: page %p is encrypted\n", mem
);
1531 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1532 new_object
, new_offset
,
1536 * Changes to mem->object require the page lock because
1537 * the pageout daemon uses that lock to get the object.
1539 vm_page_lockspin_queues();
1541 internal_to_external
= FALSE
;
1542 external_to_internal
= FALSE
;
1546 * it's much easier to get the vm_page_pageable_xxx accounting correct
1547 * if we first move the page to the active queue... it's going to end
1548 * up there anyway, and we don't do vm_page_rename's frequently enough
1549 * for this to matter.
1551 VM_PAGE_QUEUES_REMOVE(mem
);
1552 vm_page_activate(mem
);
1554 if (mem
->active
|| mem
->inactive
|| mem
->speculative
) {
1555 if (mem
->object
->internal
&& !new_object
->internal
) {
1556 internal_to_external
= TRUE
;
1558 if (!mem
->object
->internal
&& new_object
->internal
) {
1559 external_to_internal
= TRUE
;
1563 vm_page_remove(mem
, TRUE
);
1564 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
, FALSE
);
1566 if (internal_to_external
) {
1567 vm_page_pageable_internal_count
--;
1568 vm_page_pageable_external_count
++;
1569 } else if (external_to_internal
) {
1570 vm_page_pageable_external_count
--;
1571 vm_page_pageable_internal_count
++;
1574 vm_page_unlock_queues();
1580 * Initialize the fields in a new page.
1581 * This takes a structure with random values and initializes it
1582 * so that it can be given to vm_page_release or vm_page_insert.
1593 if ((phys_page
!= vm_page_fictitious_addr
) && (phys_page
!= vm_page_guard_addr
)) {
1594 if (!(pmap_valid_page(phys_page
))) {
1595 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page
);
1599 *mem
= vm_page_template
;
1600 mem
->phys_page
= phys_page
;
1603 * we're leaving this turned off for now... currently pages
1604 * come off the free list and are either immediately dirtied/referenced
1605 * due to zero-fill or COW faults, or are used to read or write files...
1606 * in the file I/O case, the UPL mechanism takes care of clearing
1607 * the state of the HW ref/mod bits in a somewhat fragile way.
1608 * Since we may change the way this works in the future (to toughen it up),
1609 * I'm leaving this as a reminder of where these bits could get cleared
1613 * make sure both the h/w referenced and modified bits are
1614 * clear at this point... we are especially dependent on
1615 * not finding a 'stale' h/w modified in a number of spots
1616 * once this page goes back into use
1618 pmap_clear_refmod(phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
1620 mem
->lopage
= lopage
;
1624 * vm_page_grab_fictitious:
1626 * Remove a fictitious page from the free list.
1627 * Returns VM_PAGE_NULL if there are no free pages.
1629 int c_vm_page_grab_fictitious
= 0;
1630 int c_vm_page_grab_fictitious_failed
= 0;
1631 int c_vm_page_release_fictitious
= 0;
1632 int c_vm_page_more_fictitious
= 0;
1635 vm_page_grab_fictitious_common(
1640 if ((m
= (vm_page_t
)zget(vm_page_zone
))) {
1642 vm_page_init(m
, phys_addr
, FALSE
);
1643 m
->fictitious
= TRUE
;
1645 c_vm_page_grab_fictitious
++;
1647 c_vm_page_grab_fictitious_failed
++;
1653 vm_page_grab_fictitious(void)
1655 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1659 vm_page_grab_guard(void)
1661 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1666 * vm_page_release_fictitious:
1668 * Release a fictitious page to the zone pool
1671 vm_page_release_fictitious(
1675 assert(m
->fictitious
);
1676 assert(m
->phys_page
== vm_page_fictitious_addr
||
1677 m
->phys_page
== vm_page_guard_addr
);
1679 c_vm_page_release_fictitious
++;
1681 zfree(vm_page_zone
, m
);
1685 * vm_page_more_fictitious:
1687 * Add more fictitious pages to the zone.
1688 * Allowed to block. This routine is way intimate
1689 * with the zones code, for several reasons:
1690 * 1. we need to carve some page structures out of physical
1691 * memory before zones work, so they _cannot_ come from
1693 * 2. the zone needs to be collectable in order to prevent
1694 * growth without bound. These structures are used by
1695 * the device pager (by the hundreds and thousands), as
1696 * private pages for pageout, and as blocking pages for
1697 * pagein. Temporary bursts in demand should not result in
1698 * permanent allocation of a resource.
1699 * 3. To smooth allocation humps, we allocate single pages
1700 * with kernel_memory_allocate(), and cram them into the
1704 void vm_page_more_fictitious(void)
1707 kern_return_t retval
;
1709 c_vm_page_more_fictitious
++;
1712 * Allocate a single page from the zone_map. Do not wait if no physical
1713 * pages are immediately available, and do not zero the space. We need
1714 * our own blocking lock here to prevent having multiple,
1715 * simultaneous requests from piling up on the zone_map lock. Exactly
1716 * one (of our) threads should be potentially waiting on the map lock.
1717 * If winner is not vm-privileged, then the page allocation will fail,
1718 * and it will temporarily block here in the vm_page_wait().
1720 lck_mtx_lock(&vm_page_alloc_lock
);
1722 * If another thread allocated space, just bail out now.
1724 if (zone_free_count(vm_page_zone
) > 5) {
1726 * The number "5" is a small number that is larger than the
1727 * number of fictitious pages that any single caller will
1728 * attempt to allocate. Otherwise, a thread will attempt to
1729 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1730 * release all of the resources and locks already acquired,
1731 * and then call this routine. This routine finds the pages
1732 * that the caller released, so fails to allocate new space.
1733 * The process repeats infinitely. The largest known number
1734 * of fictitious pages required in this manner is 2. 5 is
1735 * simply a somewhat larger number.
1737 lck_mtx_unlock(&vm_page_alloc_lock
);
1741 retval
= kernel_memory_allocate(zone_map
,
1742 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1743 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1744 if (retval
!= KERN_SUCCESS
) {
1746 * No page was available. Drop the
1747 * lock to give another thread a chance at it, and
1748 * wait for the pageout daemon to make progress.
1750 lck_mtx_unlock(&vm_page_alloc_lock
);
1751 vm_page_wait(THREAD_UNINT
);
1755 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1756 OSAddAtomic64(1, &(vm_page_zone
->page_count
));
1758 zcram(vm_page_zone
, addr
, PAGE_SIZE
);
1760 lck_mtx_unlock(&vm_page_alloc_lock
);
1767 * Return true if it is not likely that a non-vm_privileged thread
1768 * can get memory without blocking. Advisory only, since the
1769 * situation may change under us.
1774 /* No locking, at worst we will fib. */
1775 return( vm_page_free_count
<= vm_page_free_reserved
);
1781 * this is an interface to support bring-up of drivers
1782 * on platforms with physical memory > 4G...
1784 int vm_himemory_mode
= 0;
1788 * this interface exists to support hardware controllers
1789 * incapable of generating DMAs with more than 32 bits
1790 * of address on platforms with physical memory > 4G...
1792 unsigned int vm_lopages_allocated_q
= 0;
1793 unsigned int vm_lopages_allocated_cpm_success
= 0;
1794 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1795 queue_head_t vm_lopage_queue_free
;
1798 vm_page_grablo(void)
1802 if (vm_lopage_needed
== FALSE
)
1803 return (vm_page_grab());
1805 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1807 if ( !queue_empty(&vm_lopage_queue_free
)) {
1808 queue_remove_first(&vm_lopage_queue_free
,
1812 assert(vm_lopage_free_count
);
1814 vm_lopage_free_count
--;
1815 vm_lopages_allocated_q
++;
1817 if (vm_lopage_free_count
< vm_lopage_lowater
)
1818 vm_lopage_refill
= TRUE
;
1820 lck_mtx_unlock(&vm_page_queue_free_lock
);
1822 lck_mtx_unlock(&vm_page_queue_free_lock
);
1824 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1826 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1827 vm_lopages_allocated_cpm_failed
++;
1828 lck_mtx_unlock(&vm_page_queue_free_lock
);
1830 return (VM_PAGE_NULL
);
1834 vm_page_lockspin_queues();
1836 mem
->gobbled
= FALSE
;
1837 vm_page_gobble_count
--;
1838 vm_page_wire_count
--;
1840 vm_lopages_allocated_cpm_success
++;
1841 vm_page_unlock_queues();
1845 assert(!mem
->pmapped
);
1846 assert(!mem
->wpmapped
);
1847 assert(!pmap_is_noencrypt(mem
->phys_page
));
1849 mem
->pageq
.next
= NULL
;
1850 mem
->pageq
.prev
= NULL
;
1859 * first try to grab a page from the per-cpu free list...
1860 * this must be done while pre-emption is disabled... if
1861 * a page is available, we're done...
1862 * if no page is available, grab the vm_page_queue_free_lock
1863 * and see if current number of free pages would allow us
1864 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1865 * if there are pages available, disable preemption and
1866 * recheck the state of the per-cpu free list... we could
1867 * have been preempted and moved to a different cpu, or
1868 * some other thread could have re-filled it... if still
1869 * empty, figure out how many pages we can steal from the
1870 * global free queue and move to the per-cpu queue...
1871 * return 1 of these pages when done... only wakeup the
1872 * pageout_scan thread if we moved pages from the global
1873 * list... no need for the wakeup if we've satisfied the
1874 * request from the per-cpu queue.
1877 #define COLOR_GROUPS_TO_STEAL 4
1881 vm_page_grab( void )
1886 disable_preemption();
1888 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1889 return_page_from_cpu_list
:
1890 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1891 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
1892 mem
->pageq
.next
= NULL
;
1894 enable_preemption();
1896 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1897 assert(mem
->tabled
== FALSE
);
1898 assert(mem
->object
== VM_OBJECT_NULL
);
1899 assert(!mem
->laundry
);
1901 assert(pmap_verify_free(mem
->phys_page
));
1903 assert(!mem
->encrypted
);
1904 assert(!mem
->pmapped
);
1905 assert(!mem
->wpmapped
);
1906 assert(!mem
->active
);
1907 assert(!mem
->inactive
);
1908 assert(!mem
->throttled
);
1909 assert(!mem
->speculative
);
1910 assert(!pmap_is_noencrypt(mem
->phys_page
));
1914 enable_preemption();
1918 * Optionally produce warnings if the wire or gobble
1919 * counts exceed some threshold.
1921 if (vm_page_wire_count_warning
> 0
1922 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1923 printf("mk: vm_page_grab(): high wired page count of %d\n",
1924 vm_page_wire_count
);
1925 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1927 if (vm_page_gobble_count_warning
> 0
1928 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1929 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1930 vm_page_gobble_count
);
1931 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1934 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1937 * Only let privileged threads (involved in pageout)
1938 * dip into the reserved pool.
1940 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1941 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1942 lck_mtx_unlock(&vm_page_queue_free_lock
);
1948 unsigned int pages_to_steal
;
1951 while ( vm_page_free_count
== 0 ) {
1953 lck_mtx_unlock(&vm_page_queue_free_lock
);
1955 * must be a privileged thread to be
1956 * in this state since a non-privileged
1957 * thread would have bailed if we were
1958 * under the vm_page_free_reserved mark
1961 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1964 disable_preemption();
1966 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1967 lck_mtx_unlock(&vm_page_queue_free_lock
);
1970 * we got preempted and moved to another processor
1971 * or we got preempted and someone else ran and filled the cache
1973 goto return_page_from_cpu_list
;
1975 if (vm_page_free_count
<= vm_page_free_reserved
)
1978 pages_to_steal
= COLOR_GROUPS_TO_STEAL
* vm_colors
;
1980 if (pages_to_steal
> (vm_page_free_count
- vm_page_free_reserved
))
1981 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
1983 color
= PROCESSOR_DATA(current_processor(), start_color
);
1986 while (pages_to_steal
--) {
1987 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1988 vm_page_free_count_minimum
= vm_page_free_count
;
1990 while (queue_empty(&vm_page_queue_free
[color
]))
1991 color
= (color
+ 1) & vm_color_mask
;
1993 queue_remove_first(&vm_page_queue_free
[color
],
1997 mem
->pageq
.next
= NULL
;
1998 mem
->pageq
.prev
= NULL
;
2000 assert(!mem
->active
);
2001 assert(!mem
->inactive
);
2002 assert(!mem
->throttled
);
2003 assert(!mem
->speculative
);
2005 color
= (color
+ 1) & vm_color_mask
;
2010 tail
->pageq
.next
= (queue_t
)mem
;
2013 mem
->pageq
.prev
= NULL
;
2014 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
2015 assert(mem
->tabled
== FALSE
);
2016 assert(mem
->object
== VM_OBJECT_NULL
);
2017 assert(!mem
->laundry
);
2021 assert(pmap_verify_free(mem
->phys_page
));
2024 assert(!mem
->encrypted
);
2025 assert(!mem
->pmapped
);
2026 assert(!mem
->wpmapped
);
2027 assert(!pmap_is_noencrypt(mem
->phys_page
));
2029 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
2030 PROCESSOR_DATA(current_processor(), start_color
) = color
;
2033 * satisfy this request
2035 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
2037 mem
->pageq
.next
= NULL
;
2039 lck_mtx_unlock(&vm_page_queue_free_lock
);
2041 enable_preemption();
2044 * Decide if we should poke the pageout daemon.
2045 * We do this if the free count is less than the low
2046 * water mark, or if the free count is less than the high
2047 * water mark (but above the low water mark) and the inactive
2048 * count is less than its target.
2050 * We don't have the counts locked ... if they change a little,
2051 * it doesn't really matter.
2053 if ((vm_page_free_count
< vm_page_free_min
) ||
2054 ((vm_page_free_count
< vm_page_free_target
) &&
2055 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
2056 thread_wakeup((event_t
) &vm_page_free_wanted
);
2058 VM_CHECK_MEMORYSTATUS
;
2060 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2068 * Return a page to the free list.
2073 register vm_page_t mem
)
2076 int need_wakeup
= 0;
2077 int need_priv_wakeup
= 0;
2080 assert(!mem
->private && !mem
->fictitious
);
2081 if (vm_page_free_verify
) {
2082 assert(pmap_verify_free(mem
->phys_page
));
2084 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2086 pmap_clear_noencrypt(mem
->phys_page
);
2088 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2091 panic("vm_page_release");
2095 assert(!mem
->laundry
);
2096 assert(mem
->object
== VM_OBJECT_NULL
);
2097 assert(mem
->pageq
.next
== NULL
&&
2098 mem
->pageq
.prev
== NULL
);
2099 assert(mem
->listq
.next
== NULL
&&
2100 mem
->listq
.prev
== NULL
);
2102 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2103 vm_lopage_free_count
< vm_lopage_free_limit
&&
2104 mem
->phys_page
< max_valid_low_ppnum
) {
2106 * this exists to support hardware controllers
2107 * incapable of generating DMAs with more than 32 bits
2108 * of address on platforms with physical memory > 4G...
2110 queue_enter_first(&vm_lopage_queue_free
,
2114 vm_lopage_free_count
++;
2116 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
2117 vm_lopage_refill
= FALSE
;
2121 mem
->lopage
= FALSE
;
2124 color
= mem
->phys_page
& vm_color_mask
;
2125 queue_enter_first(&vm_page_queue_free
[color
],
2129 vm_page_free_count
++;
2131 * Check if we should wake up someone waiting for page.
2132 * But don't bother waking them unless they can allocate.
2134 * We wakeup only one thread, to prevent starvation.
2135 * Because the scheduling system handles wait queues FIFO,
2136 * if we wakeup all waiting threads, one greedy thread
2137 * can starve multiple niceguy threads. When the threads
2138 * all wakeup, the greedy threads runs first, grabs the page,
2139 * and waits for another page. It will be the first to run
2140 * when the next page is freed.
2142 * However, there is a slight danger here.
2143 * The thread we wake might not use the free page.
2144 * Then the other threads could wait indefinitely
2145 * while the page goes unused. To forestall this,
2146 * the pageout daemon will keep making free pages
2147 * as long as vm_page_free_wanted is non-zero.
2150 assert(vm_page_free_count
> 0);
2151 if (vm_page_free_wanted_privileged
> 0) {
2152 vm_page_free_wanted_privileged
--;
2153 need_priv_wakeup
= 1;
2154 } else if (vm_page_free_wanted
> 0 &&
2155 vm_page_free_count
> vm_page_free_reserved
) {
2156 vm_page_free_wanted
--;
2160 lck_mtx_unlock(&vm_page_queue_free_lock
);
2162 if (need_priv_wakeup
)
2163 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2164 else if (need_wakeup
)
2165 thread_wakeup_one((event_t
) &vm_page_free_count
);
2167 VM_CHECK_MEMORYSTATUS
;
2173 * Wait for a page to become available.
2174 * If there are plenty of free pages, then we don't sleep.
2177 * TRUE: There may be another page, try again
2178 * FALSE: We were interrupted out of our wait, don't try again
2186 * We can't use vm_page_free_reserved to make this
2187 * determination. Consider: some thread might
2188 * need to allocate two pages. The first allocation
2189 * succeeds, the second fails. After the first page is freed,
2190 * a call to vm_page_wait must really block.
2192 kern_return_t wait_result
;
2193 int need_wakeup
= 0;
2194 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2196 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2198 if (is_privileged
&& vm_page_free_count
) {
2199 lck_mtx_unlock(&vm_page_queue_free_lock
);
2202 if (vm_page_free_count
< vm_page_free_target
) {
2204 if (is_privileged
) {
2205 if (vm_page_free_wanted_privileged
++ == 0)
2207 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2209 if (vm_page_free_wanted
++ == 0)
2211 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2213 lck_mtx_unlock(&vm_page_queue_free_lock
);
2214 counter(c_vm_page_wait_block
++);
2217 thread_wakeup((event_t
)&vm_page_free_wanted
);
2219 if (wait_result
== THREAD_WAITING
) {
2220 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_START
,
2221 vm_page_free_wanted_privileged
, vm_page_free_wanted
, 0, 0);
2222 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2223 VM_DEBUG_EVENT(vm_page_wait_block
, VM_PAGE_WAIT_BLOCK
, DBG_FUNC_END
, 0, 0, 0, 0);
2226 return(wait_result
== THREAD_AWAKENED
);
2228 lck_mtx_unlock(&vm_page_queue_free_lock
);
2236 * Allocate and return a memory cell associated
2237 * with this VM object/offset pair.
2239 * Object must be locked.
2245 vm_object_offset_t offset
)
2247 register vm_page_t mem
;
2249 vm_object_lock_assert_exclusive(object
);
2250 mem
= vm_page_grab();
2251 if (mem
== VM_PAGE_NULL
)
2252 return VM_PAGE_NULL
;
2254 vm_page_insert(mem
, object
, offset
);
2262 vm_object_offset_t offset
)
2264 register vm_page_t mem
;
2266 vm_object_lock_assert_exclusive(object
);
2267 mem
= vm_page_grablo();
2268 if (mem
== VM_PAGE_NULL
)
2269 return VM_PAGE_NULL
;
2271 vm_page_insert(mem
, object
, offset
);
2278 * vm_page_alloc_guard:
2280 * Allocate a fictitious page which will be used
2281 * as a guard page. The page will be inserted into
2282 * the object and returned to the caller.
2286 vm_page_alloc_guard(
2288 vm_object_offset_t offset
)
2290 register vm_page_t mem
;
2292 vm_object_lock_assert_exclusive(object
);
2293 mem
= vm_page_grab_guard();
2294 if (mem
== VM_PAGE_NULL
)
2295 return VM_PAGE_NULL
;
2297 vm_page_insert(mem
, object
, offset
);
2303 counter(unsigned int c_laundry_pages_freed
= 0;)
2306 * vm_page_free_prepare:
2308 * Removes page from any queue it may be on
2309 * and disassociates it from its VM object.
2311 * Object and page queues must be locked prior to entry.
2314 vm_page_free_prepare(
2317 vm_page_free_prepare_queues(mem
);
2318 vm_page_free_prepare_object(mem
, TRUE
);
2323 vm_page_free_prepare_queues(
2328 assert(!mem
->cleaning
);
2330 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2332 panic("vm_page_free: freeing page on free list\n");
2335 vm_object_lock_assert_exclusive(mem
->object
);
2339 * We may have to free a page while it's being laundered
2340 * if we lost its pager (due to a forced unmount, for example).
2341 * We need to call vm_pageout_steal_laundry() before removing
2342 * the page from its VM object, so that we can remove it
2343 * from its pageout queue and adjust the laundry accounting
2345 vm_pageout_steal_laundry(mem
, TRUE
);
2346 counter(++c_laundry_pages_freed
);
2349 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2351 if (VM_PAGE_WIRED(mem
)) {
2353 assert(mem
->object
->wired_page_count
> 0);
2354 mem
->object
->wired_page_count
--;
2355 assert(mem
->object
->resident_page_count
>=
2356 mem
->object
->wired_page_count
);
2358 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2359 OSAddAtomic(+1, &vm_page_purgeable_count
);
2360 assert(vm_page_purgeable_wired_count
> 0);
2361 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2364 if (!mem
->private && !mem
->fictitious
)
2365 vm_page_wire_count
--;
2366 mem
->wire_count
= 0;
2367 assert(!mem
->gobbled
);
2368 } else if (mem
->gobbled
) {
2369 if (!mem
->private && !mem
->fictitious
)
2370 vm_page_wire_count
--;
2371 vm_page_gobble_count
--;
2377 vm_page_free_prepare_object(
2379 boolean_t remove_from_hash
)
2382 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2384 PAGE_WAKEUP(mem
); /* clears wanted */
2387 mem
->private = FALSE
;
2388 mem
->fictitious
= TRUE
;
2389 mem
->phys_page
= vm_page_fictitious_addr
;
2391 if ( !mem
->fictitious
) {
2392 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2400 * Returns the given page to the free list,
2401 * disassociating it with any VM object.
2403 * Object and page queues must be locked prior to entry.
2409 vm_page_free_prepare(mem
);
2411 if (mem
->fictitious
) {
2412 vm_page_release_fictitious(mem
);
2414 vm_page_release(mem
);
2420 vm_page_free_unlocked(
2422 boolean_t remove_from_hash
)
2424 vm_page_lockspin_queues();
2425 vm_page_free_prepare_queues(mem
);
2426 vm_page_unlock_queues();
2428 vm_page_free_prepare_object(mem
, remove_from_hash
);
2430 if (mem
->fictitious
) {
2431 vm_page_release_fictitious(mem
);
2433 vm_page_release(mem
);
2439 * Free a list of pages. The list can be up to several hundred pages,
2440 * as blocked up by vm_pageout_scan().
2441 * The big win is not having to take the free list lock once
2447 boolean_t prepare_object
)
2451 vm_page_t local_freeq
;
2457 local_freeq
= VM_PAGE_NULL
;
2461 * break up the processing into smaller chunks so
2462 * that we can 'pipeline' the pages onto the
2463 * free list w/o introducing too much
2464 * contention on the global free queue lock
2466 while (mem
&& pg_count
< 64) {
2468 assert(!mem
->inactive
);
2469 assert(!mem
->active
);
2470 assert(!mem
->throttled
);
2472 assert(!mem
->speculative
);
2473 assert(!VM_PAGE_WIRED(mem
));
2474 assert(mem
->pageq
.prev
== NULL
);
2476 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2478 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2479 assert(pmap_verify_free(mem
->phys_page
));
2481 if (prepare_object
== TRUE
)
2482 vm_page_free_prepare_object(mem
, TRUE
);
2484 if (!mem
->fictitious
) {
2487 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2488 vm_lopage_free_count
< vm_lopage_free_limit
&&
2489 mem
->phys_page
< max_valid_low_ppnum
) {
2490 mem
->pageq
.next
= NULL
;
2491 vm_page_release(mem
);
2494 * IMPORTANT: we can't set the page "free" here
2495 * because that would make the page eligible for
2496 * a physically-contiguous allocation (see
2497 * vm_page_find_contiguous()) right away (we don't
2498 * hold the vm_page_queue_free lock). That would
2499 * cause trouble because the page is not actually
2500 * in the free queue yet...
2502 mem
->pageq
.next
= (queue_entry_t
)local_freeq
;
2506 pmap_clear_noencrypt(mem
->phys_page
);
2509 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2510 mem
->phys_page
== vm_page_guard_addr
);
2511 vm_page_release_fictitious(mem
);
2517 if ( (mem
= local_freeq
) ) {
2518 unsigned int avail_free_count
;
2519 unsigned int need_wakeup
= 0;
2520 unsigned int need_priv_wakeup
= 0;
2522 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2527 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2533 color
= mem
->phys_page
& vm_color_mask
;
2534 queue_enter_first(&vm_page_queue_free
[color
],
2540 vm_page_free_count
+= pg_count
;
2541 avail_free_count
= vm_page_free_count
;
2543 if (vm_page_free_wanted_privileged
> 0 && avail_free_count
> 0) {
2545 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2546 need_priv_wakeup
= avail_free_count
;
2547 vm_page_free_wanted_privileged
-= avail_free_count
;
2548 avail_free_count
= 0;
2550 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2551 vm_page_free_wanted_privileged
= 0;
2552 avail_free_count
-= vm_page_free_wanted_privileged
;
2555 if (vm_page_free_wanted
> 0 && avail_free_count
> vm_page_free_reserved
) {
2556 unsigned int available_pages
;
2558 available_pages
= avail_free_count
- vm_page_free_reserved
;
2560 if (available_pages
>= vm_page_free_wanted
) {
2561 need_wakeup
= vm_page_free_wanted
;
2562 vm_page_free_wanted
= 0;
2564 need_wakeup
= available_pages
;
2565 vm_page_free_wanted
-= available_pages
;
2568 lck_mtx_unlock(&vm_page_queue_free_lock
);
2570 if (need_priv_wakeup
!= 0) {
2572 * There shouldn't be that many VM-privileged threads,
2573 * so let's wake them all up, even if we don't quite
2574 * have enough pages to satisfy them all.
2576 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2578 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2580 * We don't expect to have any more waiters
2581 * after this, so let's wake them all up at
2584 thread_wakeup((event_t
) &vm_page_free_count
);
2585 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2587 * Wake up one waiter per page we just released.
2589 thread_wakeup_one((event_t
) &vm_page_free_count
);
2592 VM_CHECK_MEMORYSTATUS
;
2601 * Mark this page as wired down by yet
2602 * another map, removing it from paging queues
2605 * The page's object and the page queues must be locked.
2609 register vm_page_t mem
)
2612 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2616 vm_object_lock_assert_exclusive(mem
->object
);
2619 * In theory, the page should be in an object before it
2620 * gets wired, since we need to hold the object lock
2621 * to update some fields in the page structure.
2622 * However, some code (i386 pmap, for example) might want
2623 * to wire a page before it gets inserted into an object.
2624 * That's somewhat OK, as long as nobody else can get to
2625 * that page and update it at the same time.
2629 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2631 if ( !VM_PAGE_WIRED(mem
)) {
2633 if (mem
->pageout_queue
) {
2634 mem
->pageout
= FALSE
;
2635 vm_pageout_throttle_up(mem
);
2637 VM_PAGE_QUEUES_REMOVE(mem
);
2640 mem
->object
->wired_page_count
++;
2641 assert(mem
->object
->resident_page_count
>=
2642 mem
->object
->wired_page_count
);
2643 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2644 assert(vm_page_purgeable_count
> 0);
2645 OSAddAtomic(-1, &vm_page_purgeable_count
);
2646 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2648 if (mem
->object
->all_reusable
) {
2650 * Wired pages are not counted as "re-usable"
2651 * in "all_reusable" VM objects, so nothing
2654 } else if (mem
->reusable
) {
2656 * This page is not "re-usable" when it's
2657 * wired, so adjust its state and the
2660 vm_object_reuse_pages(mem
->object
,
2662 mem
->offset
+PAGE_SIZE_64
,
2666 assert(!mem
->reusable
);
2668 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2669 vm_page_wire_count
++;
2671 vm_page_gobble_count
--;
2672 mem
->gobbled
= FALSE
;
2674 VM_CHECK_MEMORYSTATUS
;
2678 * The page could be encrypted, but
2679 * We don't have to decrypt it here
2680 * because we don't guarantee that the
2681 * data is actually valid at this point.
2682 * The page will get decrypted in
2683 * vm_fault_wire() if needed.
2686 assert(!mem
->gobbled
);
2694 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2696 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2700 register vm_page_t mem
)
2702 vm_page_lockspin_queues();
2705 assert(!mem
->gobbled
);
2706 assert( !VM_PAGE_WIRED(mem
));
2708 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2709 if (!mem
->private && !mem
->fictitious
)
2710 vm_page_wire_count
++;
2712 vm_page_gobble_count
++;
2713 mem
->gobbled
= TRUE
;
2714 vm_page_unlock_queues();
2720 * Release one wiring of this page, potentially
2721 * enabling it to be paged again.
2723 * The page's object and the page queues must be locked.
2731 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2734 assert(VM_PAGE_WIRED(mem
));
2735 assert(mem
->object
!= VM_OBJECT_NULL
);
2737 vm_object_lock_assert_exclusive(mem
->object
);
2738 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2740 if (--mem
->wire_count
== 0) {
2741 assert(!mem
->private && !mem
->fictitious
);
2742 vm_page_wire_count
--;
2743 assert(mem
->object
->wired_page_count
> 0);
2744 mem
->object
->wired_page_count
--;
2745 assert(mem
->object
->resident_page_count
>=
2746 mem
->object
->wired_page_count
);
2747 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2748 OSAddAtomic(+1, &vm_page_purgeable_count
);
2749 assert(vm_page_purgeable_wired_count
> 0);
2750 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2752 assert(!mem
->laundry
);
2753 assert(mem
->object
!= kernel_object
);
2754 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
2756 if (queueit
== TRUE
) {
2757 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
2758 vm_page_deactivate(mem
);
2760 vm_page_activate(mem
);
2764 VM_CHECK_MEMORYSTATUS
;
2771 * vm_page_deactivate:
2773 * Returns the given page to the inactive list,
2774 * indicating that no physical maps have access
2775 * to this page. [Used by the physical mapping system.]
2777 * The page queues must be locked.
2783 vm_page_deactivate_internal(m
, TRUE
);
2788 vm_page_deactivate_internal(
2790 boolean_t clear_hw_reference
)
2794 assert(m
->object
!= kernel_object
);
2795 assert(m
->phys_page
!= vm_page_guard_addr
);
2797 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2799 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2802 * This page is no longer very interesting. If it was
2803 * interesting (active or inactive/referenced), then we
2804 * clear the reference bit and (re)enter it in the
2805 * inactive queue. Note wired pages should not have
2806 * their reference bit cleared.
2808 assert ( !(m
->absent
&& !m
->unusual
));
2810 if (m
->gobbled
) { /* can this happen? */
2811 assert( !VM_PAGE_WIRED(m
));
2813 if (!m
->private && !m
->fictitious
)
2814 vm_page_wire_count
--;
2815 vm_page_gobble_count
--;
2819 * if this page is currently on the pageout queue, we can't do the
2820 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2821 * and we can't remove it manually since we would need the object lock
2822 * (which is not required here) to decrement the activity_in_progress
2823 * reference which is held on the object while the page is in the pageout queue...
2824 * just let the normal laundry processing proceed
2826 if (m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
|| (VM_PAGE_WIRED(m
)))
2829 if (!m
->absent
&& clear_hw_reference
== TRUE
)
2830 pmap_clear_reference(m
->phys_page
);
2832 m
->reference
= FALSE
;
2833 m
->no_cache
= FALSE
;
2836 VM_PAGE_QUEUES_REMOVE(m
);
2838 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2839 m
->dirty
&& m
->object
->internal
&&
2840 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2841 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2842 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2843 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2844 m
->throttled
= TRUE
;
2845 vm_page_throttled_count
++;
2847 if (m
->object
->named
&& m
->object
->ref_count
== 1) {
2848 vm_page_speculate(m
, FALSE
);
2849 #if DEVELOPMENT || DEBUG
2850 vm_page_speculative_recreated
++;
2853 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
2860 * vm_page_enqueue_cleaned
2862 * Put the page on the cleaned queue, mark it cleaned, etc.
2863 * Being on the cleaned queue (and having m->clean_queue set)
2864 * does ** NOT ** guarantee that the page is clean!
2866 * Call with the queues lock held.
2869 void vm_page_enqueue_cleaned(vm_page_t m
)
2871 assert(m
->phys_page
!= vm_page_guard_addr
);
2873 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2875 assert( !(m
->absent
&& !m
->unusual
));
2878 assert( !VM_PAGE_WIRED(m
));
2879 if (!m
->private && !m
->fictitious
)
2880 vm_page_wire_count
--;
2881 vm_page_gobble_count
--;
2885 * if this page is currently on the pageout queue, we can't do the
2886 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2887 * and we can't remove it manually since we would need the object lock
2888 * (which is not required here) to decrement the activity_in_progress
2889 * reference which is held on the object while the page is in the pageout queue...
2890 * just let the normal laundry processing proceed
2892 if (m
->clean_queue
|| m
->pageout_queue
|| m
->private || m
->fictitious
)
2895 VM_PAGE_QUEUES_REMOVE(m
);
2897 queue_enter(&vm_page_queue_cleaned
, m
, vm_page_t
, pageq
);
2898 m
->clean_queue
= TRUE
;
2899 vm_page_cleaned_count
++;
2902 vm_page_inactive_count
++;
2903 if (m
->object
->internal
) {
2904 vm_page_pageable_internal_count
++;
2906 vm_page_pageable_external_count
++;
2909 vm_pageout_enqueued_cleaned
++;
2915 * Put the specified page on the active list (if appropriate).
2917 * The page queues must be locked.
2922 extern struct vm_page jetsam_latency_page
[NUM_OF_JETSAM_LATENCY_TOKENS
];
2923 #endif /* LATENCY_JETSAM */
2924 #endif /* CONFIG_JETSAM */
2928 register vm_page_t m
)
2931 #ifdef FIXME_4778297
2932 assert(m
->object
!= kernel_object
);
2934 assert(m
->phys_page
!= vm_page_guard_addr
);
2936 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2938 assert( !(m
->absent
&& !m
->unusual
));
2941 assert( !VM_PAGE_WIRED(m
));
2942 if (!m
->private && !m
->fictitious
)
2943 vm_page_wire_count
--;
2944 vm_page_gobble_count
--;
2948 * if this page is currently on the pageout queue, we can't do the
2949 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2950 * and we can't remove it manually since we would need the object lock
2951 * (which is not required here) to decrement the activity_in_progress
2952 * reference which is held on the object while the page is in the pageout queue...
2953 * just let the normal laundry processing proceed
2955 if (m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
2960 panic("vm_page_activate: already active");
2963 if (m
->speculative
) {
2964 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
2965 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
2968 VM_PAGE_QUEUES_REMOVE(m
);
2970 if ( !VM_PAGE_WIRED(m
)) {
2972 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
) &&
2973 m
->dirty
&& m
->object
->internal
&&
2974 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2975 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2976 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2977 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2978 m
->throttled
= TRUE
;
2979 vm_page_throttled_count
++;
2981 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2983 vm_page_active_count
++;
2984 if (m
->object
->internal
) {
2985 vm_page_pageable_internal_count
++;
2987 vm_page_pageable_external_count
++;
2991 uint64_t now
= mach_absolute_time();
2992 uint64_t delta
= now
- jlp_time
;
2993 clock_sec_t jl_secs
= 0;
2994 clock_usec_t jl_usecs
= 0;
2997 absolutetime_to_microtime(delta
, &jl_secs
, &jl_usecs
);
2999 jl_usecs
+= jl_secs
* USEC_PER_SEC
;
3000 if (jl_usecs
>= JETSAM_LATENCY_TOKEN_AGE
) {
3002 jlp
= &jetsam_latency_page
[jlp_current
];
3004 queue_remove(&vm_page_queue_active
, jlp
, vm_page_t
, pageq
);
3006 queue_enter(&vm_page_queue_active
, jlp
, vm_page_t
, pageq
);
3011 jlp_time
= jlp
->offset
;
3013 if(++jlp_current
== NUM_OF_JETSAM_LATENCY_TOKENS
) {
3019 #endif /* LATENCY_JETSAM */
3021 m
->reference
= TRUE
;
3022 m
->no_cache
= FALSE
;
3029 * vm_page_speculate:
3031 * Put the specified page on the speculative list (if appropriate).
3033 * The page queues must be locked.
3040 struct vm_speculative_age_q
*aq
;
3043 assert(m
->object
!= kernel_object
);
3044 assert(m
->phys_page
!= vm_page_guard_addr
);
3046 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3048 assert( !(m
->absent
&& !m
->unusual
));
3051 * if this page is currently on the pageout queue, we can't do the
3052 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3053 * and we can't remove it manually since we would need the object lock
3054 * (which is not required here) to decrement the activity_in_progress
3055 * reference which is held on the object while the page is in the pageout queue...
3056 * just let the normal laundry processing proceed
3058 if (m
->pageout_queue
|| m
->private || m
->fictitious
|| m
->compressor
)
3061 VM_PAGE_QUEUES_REMOVE(m
);
3063 if ( !VM_PAGE_WIRED(m
)) {
3068 clock_get_system_nanotime(&sec
, &nsec
);
3069 ts
.tv_sec
= (unsigned int) sec
;
3072 if (vm_page_speculative_count
== 0) {
3074 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3075 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3077 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3080 * set the timer to begin a new group
3082 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3083 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3085 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3087 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3089 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
3091 speculative_age_index
++;
3093 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3094 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3095 if (speculative_age_index
== speculative_steal_index
) {
3096 speculative_steal_index
= speculative_age_index
+ 1;
3098 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
3099 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
3101 aq
= &vm_page_queue_speculative
[speculative_age_index
];
3103 if (!queue_empty(&aq
->age_q
))
3104 vm_page_speculate_ageit(aq
);
3106 aq
->age_ts
.tv_sec
= vm_page_speculative_q_age_ms
/ 1000;
3107 aq
->age_ts
.tv_nsec
= (vm_page_speculative_q_age_ms
% 1000) * 1000 * NSEC_PER_USEC
;
3109 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
3112 enqueue_tail(&aq
->age_q
, &m
->pageq
);
3113 m
->speculative
= TRUE
;
3114 vm_page_speculative_count
++;
3115 if (m
->object
->internal
) {
3116 vm_page_pageable_internal_count
++;
3118 vm_page_pageable_external_count
++;
3122 vm_object_lock_assert_exclusive(m
->object
);
3124 m
->object
->pages_created
++;
3125 #if DEVELOPMENT || DEBUG
3126 vm_page_speculative_created
++;
3135 * move pages from the specified aging bin to
3136 * the speculative bin that pageout_scan claims from
3138 * The page queues must be locked.
3141 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3143 struct vm_speculative_age_q
*sq
;
3146 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3148 if (queue_empty(&sq
->age_q
)) {
3149 sq
->age_q
.next
= aq
->age_q
.next
;
3150 sq
->age_q
.prev
= aq
->age_q
.prev
;
3152 t
= (vm_page_t
)sq
->age_q
.next
;
3153 t
->pageq
.prev
= &sq
->age_q
;
3155 t
= (vm_page_t
)sq
->age_q
.prev
;
3156 t
->pageq
.next
= &sq
->age_q
;
3158 t
= (vm_page_t
)sq
->age_q
.prev
;
3159 t
->pageq
.next
= aq
->age_q
.next
;
3161 t
= (vm_page_t
)aq
->age_q
.next
;
3162 t
->pageq
.prev
= sq
->age_q
.prev
;
3164 t
= (vm_page_t
)aq
->age_q
.prev
;
3165 t
->pageq
.next
= &sq
->age_q
;
3167 sq
->age_q
.prev
= aq
->age_q
.prev
;
3169 queue_init(&aq
->age_q
);
3178 assert(m
->object
!= kernel_object
);
3179 assert(m
->phys_page
!= vm_page_guard_addr
);
3182 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3185 * if this page is currently on the pageout queue, we can't do the
3186 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3187 * and we can't remove it manually since we would need the object lock
3188 * (which is not required here) to decrement the activity_in_progress
3189 * reference which is held on the object while the page is in the pageout queue...
3190 * just let the normal laundry processing proceed
3192 if (m
->pageout_queue
|| m
->private || m
->compressor
|| (VM_PAGE_WIRED(m
)))
3195 m
->no_cache
= FALSE
;
3197 VM_PAGE_QUEUES_REMOVE(m
);
3199 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
3204 vm_page_reactivate_all_throttled(void)
3206 vm_page_t first_throttled
, last_throttled
;
3207 vm_page_t first_active
;
3209 int extra_active_count
;
3210 int extra_internal_count
, extra_external_count
;
3212 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default
))
3215 extra_active_count
= 0;
3216 extra_internal_count
= 0;
3217 extra_external_count
= 0;
3218 vm_page_lock_queues();
3219 if (! queue_empty(&vm_page_queue_throttled
)) {
3221 * Switch "throttled" pages to "active".
3223 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3225 assert(m
->throttled
);
3227 assert(!m
->inactive
);
3228 assert(!m
->speculative
);
3229 assert(!VM_PAGE_WIRED(m
));
3231 extra_active_count
++;
3232 if (m
->object
->internal
) {
3233 extra_internal_count
++;
3235 extra_external_count
++;
3238 m
->throttled
= FALSE
;
3244 * Transfer the entire throttled queue to a regular LRU page queues.
3245 * We insert it at the head of the active queue, so that these pages
3246 * get re-evaluated by the LRU algorithm first, since they've been
3247 * completely out of it until now.
3249 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3250 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3251 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3252 if (queue_empty(&vm_page_queue_active
)) {
3253 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3255 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3257 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3258 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3259 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3262 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3264 queue_init(&vm_page_queue_throttled
);
3266 * Adjust the global page counts.
3268 vm_page_active_count
+= extra_active_count
;
3269 vm_page_pageable_internal_count
+= extra_internal_count
;
3270 vm_page_pageable_external_count
+= extra_external_count
;
3271 vm_page_throttled_count
= 0;
3273 assert(vm_page_throttled_count
== 0);
3274 assert(queue_empty(&vm_page_queue_throttled
));
3275 vm_page_unlock_queues();
3280 * move pages from the indicated local queue to the global active queue
3281 * its ok to fail if we're below the hard limit and force == FALSE
3282 * the nolocks == TRUE case is to allow this function to be run on
3283 * the hibernate path
3287 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3290 vm_page_t first_local
, last_local
;
3291 vm_page_t first_active
;
3295 if (vm_page_local_q
== NULL
)
3298 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3300 if (nolocks
== FALSE
) {
3301 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3302 if ( !vm_page_trylockspin_queues())
3305 vm_page_lockspin_queues();
3307 VPL_LOCK(&lq
->vpl_lock
);
3309 if (lq
->vpl_count
) {
3311 * Switch "local" pages to "active".
3313 assert(!queue_empty(&lq
->vpl_queue
));
3315 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3319 assert(!m
->inactive
);
3320 assert(!m
->speculative
);
3321 assert(!VM_PAGE_WIRED(m
));
3322 assert(!m
->throttled
);
3323 assert(!m
->fictitious
);
3325 if (m
->local_id
!= lid
)
3326 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3335 if (count
!= lq
->vpl_count
)
3336 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3339 * Transfer the entire local queue to a regular LRU page queues.
3341 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3342 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3343 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3345 if (queue_empty(&vm_page_queue_active
)) {
3346 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3348 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3350 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3351 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3352 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3354 queue_init(&lq
->vpl_queue
);
3356 * Adjust the global page counts.
3358 vm_page_active_count
+= lq
->vpl_count
;
3359 vm_page_pageable_internal_count
+= lq
->vpl_internal_count
;
3360 vm_page_pageable_external_count
+= lq
->vpl_external_count
;
3362 lq
->vpl_internal_count
= 0;
3363 lq
->vpl_external_count
= 0;
3365 assert(queue_empty(&lq
->vpl_queue
));
3367 if (nolocks
== FALSE
) {
3368 VPL_UNLOCK(&lq
->vpl_lock
);
3369 vm_page_unlock_queues();
3374 * vm_page_part_zero_fill:
3376 * Zero-fill a part of the page.
3378 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3380 vm_page_part_zero_fill(
3388 * we don't hold the page queue lock
3389 * so this check isn't safe to make
3394 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3395 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3399 tmp
= vm_page_grab();
3400 if (tmp
== VM_PAGE_NULL
) {
3401 vm_page_wait(THREAD_UNINT
);
3406 vm_page_zero_fill(tmp
);
3408 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3410 if((m_pa
+ len
) < PAGE_SIZE
) {
3411 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3412 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3414 vm_page_copy(tmp
,m
);
3421 * vm_page_zero_fill:
3423 * Zero-fill the specified page.
3430 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3431 m
->object
, m
->offset
, m
, 0,0);
3434 * we don't hold the page queue lock
3435 * so this check isn't safe to make
3440 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3441 pmap_zero_page(m
->phys_page
);
3445 * vm_page_part_copy:
3447 * copy part of one page to another
3460 * we don't hold the page queue lock
3461 * so this check isn't safe to make
3463 VM_PAGE_CHECK(src_m
);
3464 VM_PAGE_CHECK(dst_m
);
3466 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3467 dst_m
->phys_page
, dst_pa
, len
);
3473 * Copy one page to another
3476 * The source page should not be encrypted. The caller should
3477 * make sure the page is decrypted first, if necessary.
3480 int vm_page_copy_cs_validations
= 0;
3481 int vm_page_copy_cs_tainted
= 0;
3489 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3490 src_m
->object
, src_m
->offset
,
3491 dest_m
->object
, dest_m
->offset
,
3495 * we don't hold the page queue lock
3496 * so this check isn't safe to make
3498 VM_PAGE_CHECK(src_m
);
3499 VM_PAGE_CHECK(dest_m
);
3501 vm_object_lock_assert_held(src_m
->object
);
3505 * The source page should not be encrypted at this point.
3506 * The destination page will therefore not contain encrypted
3507 * data after the copy.
3509 if (src_m
->encrypted
) {
3510 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3512 dest_m
->encrypted
= FALSE
;
3514 if (src_m
->object
!= VM_OBJECT_NULL
&&
3515 src_m
->object
->code_signed
) {
3517 * We're copying a page from a code-signed object.
3518 * Whoever ends up mapping the copy page might care about
3519 * the original page's integrity, so let's validate the
3522 vm_page_copy_cs_validations
++;
3523 vm_page_validate_cs(src_m
);
3526 if (vm_page_is_slideable(src_m
)) {
3527 boolean_t was_busy
= src_m
->busy
;
3529 (void) vm_page_slide(src_m
, 0);
3530 assert(src_m
->busy
);
3532 PAGE_WAKEUP_DONE(src_m
);
3537 * Propagate the cs_tainted bit to the copy page. Do not propagate
3538 * the cs_validated bit.
3540 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3541 if (dest_m
->cs_tainted
) {
3542 vm_page_copy_cs_tainted
++;
3544 dest_m
->slid
= src_m
->slid
;
3545 dest_m
->error
= src_m
->error
; /* sliding src_m might have failed... */
3546 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3554 printf("vm_page %p: \n", p
);
3555 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3556 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3557 printf(" next=%p\n", p
->next
);
3558 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3559 printf(" wire_count=%u\n", p
->wire_count
);
3561 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3562 (p
->local
? "" : "!"),
3563 (p
->inactive
? "" : "!"),
3564 (p
->active
? "" : "!"),
3565 (p
->pageout_queue
? "" : "!"),
3566 (p
->speculative
? "" : "!"),
3567 (p
->laundry
? "" : "!"));
3568 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3569 (p
->free
? "" : "!"),
3570 (p
->reference
? "" : "!"),
3571 (p
->gobbled
? "" : "!"),
3572 (p
->private ? "" : "!"),
3573 (p
->throttled
? "" : "!"));
3574 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3575 (p
->busy
? "" : "!"),
3576 (p
->wanted
? "" : "!"),
3577 (p
->tabled
? "" : "!"),
3578 (p
->fictitious
? "" : "!"),
3579 (p
->pmapped
? "" : "!"),
3580 (p
->wpmapped
? "" : "!"));
3581 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3582 (p
->pageout
? "" : "!"),
3583 (p
->absent
? "" : "!"),
3584 (p
->error
? "" : "!"),
3585 (p
->dirty
? "" : "!"),
3586 (p
->cleaning
? "" : "!"),
3587 (p
->precious
? "" : "!"),
3588 (p
->clustered
? "" : "!"));
3589 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3590 (p
->overwriting
? "" : "!"),
3591 (p
->restart
? "" : "!"),
3592 (p
->unusual
? "" : "!"),
3593 (p
->encrypted
? "" : "!"),
3594 (p
->encrypted_cleaning
? "" : "!"));
3595 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3596 (p
->cs_validated
? "" : "!"),
3597 (p
->cs_tainted
? "" : "!"),
3598 (p
->no_cache
? "" : "!"));
3600 printf("phys_page=0x%x\n", p
->phys_page
);
3604 * Check that the list of pages is ordered by
3605 * ascending physical address and has no holes.
3608 vm_page_verify_contiguous(
3610 unsigned int npages
)
3612 register vm_page_t m
;
3613 unsigned int page_count
;
3614 vm_offset_t prev_addr
;
3616 prev_addr
= pages
->phys_page
;
3618 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3619 if (m
->phys_page
!= prev_addr
+ 1) {
3620 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3621 m
, (long)prev_addr
, m
->phys_page
);
3622 printf("pages %p page_count %d npages %d\n", pages
, page_count
, npages
);
3623 panic("vm_page_verify_contiguous: not contiguous!");
3625 prev_addr
= m
->phys_page
;
3628 if (page_count
!= npages
) {
3629 printf("pages %p actual count 0x%x but requested 0x%x\n",
3630 pages
, page_count
, npages
);
3631 panic("vm_page_verify_contiguous: count error");
3638 * Check the free lists for proper length etc.
3641 vm_page_verify_free_list(
3642 queue_head_t
*vm_page_queue
,
3644 vm_page_t look_for_page
,
3645 boolean_t expect_page
)
3647 unsigned int npages
;
3650 boolean_t found_page
;
3654 prev_m
= (vm_page_t
) vm_page_queue
;
3655 queue_iterate(vm_page_queue
,
3660 if (m
== look_for_page
) {
3663 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3664 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3665 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3667 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3669 if (color
!= (unsigned int) -1) {
3670 if ((m
->phys_page
& vm_color_mask
) != color
)
3671 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3672 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3674 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3680 if (look_for_page
!= VM_PAGE_NULL
) {
3681 unsigned int other_color
;
3683 if (expect_page
&& !found_page
) {
3684 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3685 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3686 _vm_page_print(look_for_page
);
3687 for (other_color
= 0;
3688 other_color
< vm_colors
;
3690 if (other_color
== color
)
3692 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3693 other_color
, look_for_page
, FALSE
);
3695 if (color
== (unsigned int) -1) {
3696 vm_page_verify_free_list(&vm_lopage_queue_free
,
3697 (unsigned int) -1, look_for_page
, FALSE
);
3699 panic("vm_page_verify_free_list(color=%u)\n", color
);
3701 if (!expect_page
&& found_page
) {
3702 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3703 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3709 static boolean_t vm_page_verify_free_lists_enabled
= FALSE
;
3711 vm_page_verify_free_lists( void )
3713 unsigned int color
, npages
, nlopages
;
3715 if (! vm_page_verify_free_lists_enabled
)
3720 lck_mtx_lock(&vm_page_queue_free_lock
);
3722 for( color
= 0; color
< vm_colors
; color
++ ) {
3723 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3724 color
, VM_PAGE_NULL
, FALSE
);
3726 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3728 VM_PAGE_NULL
, FALSE
);
3729 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3730 panic("vm_page_verify_free_lists: "
3731 "npages %u free_count %d nlopages %u lo_free_count %u",
3732 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3734 lck_mtx_unlock(&vm_page_queue_free_lock
);
3738 vm_page_queues_assert(
3743 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3745 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3746 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3747 _vm_page_print(mem
);
3748 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3750 if (VM_PAGE_WIRED(mem
)) {
3751 assert(!mem
->active
);
3752 assert(!mem
->inactive
);
3753 assert(!mem
->speculative
);
3754 assert(!mem
->throttled
);
3755 assert(!mem
->pageout_queue
);
3758 #endif /* MACH_ASSERT */
3762 * CONTIGUOUS PAGE ALLOCATION
3764 * Find a region large enough to contain at least n pages
3765 * of contiguous physical memory.
3767 * This is done by traversing the vm_page_t array in a linear fashion
3768 * we assume that the vm_page_t array has the avaiable physical pages in an
3769 * ordered, ascending list... this is currently true of all our implementations
3770 * and must remain so... there can be 'holes' in the array... we also can
3771 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3772 * which use to happen via 'vm_page_convert'... that function was no longer
3773 * being called and was removed...
3775 * The basic flow consists of stabilizing some of the interesting state of
3776 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3777 * sweep at the beginning of the array looking for pages that meet our criterea
3778 * for a 'stealable' page... currently we are pretty conservative... if the page
3779 * meets this criterea and is physically contiguous to the previous page in the 'run'
3780 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3781 * and start to develop a new run... if at this point we've already considered
3782 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3783 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3784 * to other threads trying to acquire free pages (or move pages from q to q),
3785 * and then continue from the spot we left off... we only make 1 pass through the
3786 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3787 * which steals the pages from the queues they're currently on... pages on the free
3788 * queue can be stolen directly... pages that are on any of the other queues
3789 * must be removed from the object they are tabled on... this requires taking the
3790 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3791 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3792 * dump the pages we've currently stolen back to the free list, and pick up our
3793 * scan from the point where we aborted the 'current' run.
3797 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3799 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3804 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3807 #define RESET_STATE_OF_RUN() \
3809 prevcontaddr = -2; \
3811 free_considered = 0; \
3812 substitute_needed = 0; \
3817 * Can we steal in-use (i.e. not free) pages when searching for
3818 * physically-contiguous pages ?
3820 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3822 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
3824 int vm_page_find_contig_debug
= 0;
3828 vm_page_find_contiguous(
3829 unsigned int contig_pages
,
3836 ppnum_t prevcontaddr
;
3838 unsigned int npages
, considered
, scanned
;
3839 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
3840 unsigned int idx_last_contig_page_found
= 0;
3841 int free_considered
, free_available
;
3842 int substitute_needed
;
3845 clock_sec_t tv_start_sec
, tv_end_sec
;
3846 clock_usec_t tv_start_usec
, tv_end_usec
;
3851 int stolen_pages
= 0;
3852 int compressed_pages
= 0;
3855 if (contig_pages
== 0)
3856 return VM_PAGE_NULL
;
3859 vm_page_verify_free_lists();
3862 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
3864 PAGE_REPLACEMENT_ALLOWED(TRUE
);
3866 vm_page_lock_queues();
3867 lck_mtx_lock(&vm_page_queue_free_lock
);
3869 RESET_STATE_OF_RUN();
3873 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3877 if(flags
& KMA_LOMEM
)
3878 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
3880 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
3882 orig_last_idx
= idx_last_contig_page_found
;
3883 last_idx
= orig_last_idx
;
3885 for (page_idx
= last_idx
, start_idx
= last_idx
;
3886 npages
< contig_pages
&& page_idx
< vm_pages_count
;
3891 page_idx
>= orig_last_idx
) {
3893 * We're back where we started and we haven't
3894 * found any suitable contiguous range. Let's
3900 m
= &vm_pages
[page_idx
];
3902 assert(!m
->fictitious
);
3903 assert(!m
->private);
3905 if (max_pnum
&& m
->phys_page
> max_pnum
) {
3906 /* no more low pages... */
3909 if (!npages
& ((m
->phys_page
& pnum_mask
) != 0)) {
3913 RESET_STATE_OF_RUN();
3915 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
3916 m
->encrypted_cleaning
||
3917 m
->pageout_queue
|| m
->laundry
|| m
->wanted
||
3918 m
->cleaning
|| m
->overwriting
|| m
->pageout
) {
3920 * page is in a transient state
3921 * or a state we don't want to deal
3922 * with, so don't consider it which
3923 * means starting a new run
3925 RESET_STATE_OF_RUN();
3927 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
&& !m
->compressor
) {
3929 * page needs to be on one of our queues
3930 * or it needs to belong to the compressor pool
3931 * in order for it to be stable behind the
3932 * locks we hold at this point...
3933 * if not, don't consider it which
3934 * means starting a new run
3936 RESET_STATE_OF_RUN();
3938 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
3940 * pages on the free list are always 'busy'
3941 * so we couldn't test for 'busy' in the check
3942 * for the transient states... pages that are
3943 * 'free' are never 'tabled', so we also couldn't
3944 * test for 'tabled'. So we check here to make
3945 * sure that a non-free page is not busy and is
3946 * tabled on an object...
3947 * if not, don't consider it which
3948 * means starting a new run
3950 RESET_STATE_OF_RUN();
3953 if (m
->phys_page
!= prevcontaddr
+ 1) {
3954 if ((m
->phys_page
& pnum_mask
) != 0) {
3955 RESET_STATE_OF_RUN();
3959 start_idx
= page_idx
;
3960 start_pnum
= m
->phys_page
;
3965 prevcontaddr
= m
->phys_page
;
3972 * This page is not free.
3973 * If we can't steal used pages,
3974 * we have to give up this run
3976 * Otherwise, we might need to
3977 * move the contents of this page
3978 * into a substitute page.
3980 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3981 if (m
->pmapped
|| m
->dirty
|| m
->precious
) {
3982 substitute_needed
++;
3985 RESET_STATE_OF_RUN();
3989 if ((free_considered
+ substitute_needed
) > free_available
) {
3991 * if we let this run continue
3992 * we will end up dropping the vm_page_free_count
3993 * below the reserve limit... we need to abort
3994 * this run, but we can at least re-consider this
3995 * page... thus the jump back to 'retry'
3997 RESET_STATE_OF_RUN();
3999 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
4004 * free_available == 0
4005 * so can't consider any free pages... if
4006 * we went to retry in this case, we'd
4007 * get stuck looking at the same page
4008 * w/o making any forward progress
4009 * we also want to take this path if we've already
4010 * reached our limit that controls the lock latency
4015 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
4017 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4019 lck_mtx_unlock(&vm_page_queue_free_lock
);
4020 vm_page_unlock_queues();
4024 PAGE_REPLACEMENT_ALLOWED(TRUE
);
4026 vm_page_lock_queues();
4027 lck_mtx_lock(&vm_page_queue_free_lock
);
4029 RESET_STATE_OF_RUN();
4031 * reset our free page limit since we
4032 * dropped the lock protecting the vm_page_free_queue
4034 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4045 if (npages
!= contig_pages
) {
4048 * We didn't find a contiguous range but we didn't
4049 * start from the very first page.
4050 * Start again from the very first page.
4052 RESET_STATE_OF_RUN();
4053 if( flags
& KMA_LOMEM
)
4054 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
4056 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
4058 page_idx
= last_idx
;
4062 lck_mtx_unlock(&vm_page_queue_free_lock
);
4066 unsigned int cur_idx
;
4067 unsigned int tmp_start_idx
;
4068 vm_object_t locked_object
= VM_OBJECT_NULL
;
4069 boolean_t abort_run
= FALSE
;
4071 assert(page_idx
- start_idx
== contig_pages
);
4073 tmp_start_idx
= start_idx
;
4076 * first pass through to pull the free pages
4077 * off of the free queue so that in case we
4078 * need substitute pages, we won't grab any
4079 * of the free pages in the run... we'll clear
4080 * the 'free' bit in the 2nd pass, and even in
4081 * an abort_run case, we'll collect all of the
4082 * free pages in this run and return them to the free list
4084 while (start_idx
< page_idx
) {
4086 m1
= &vm_pages
[start_idx
++];
4088 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4095 color
= m1
->phys_page
& vm_color_mask
;
4097 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, m1
, TRUE
);
4099 queue_remove(&vm_page_queue_free
[color
],
4103 m1
->pageq
.next
= NULL
;
4104 m1
->pageq
.prev
= NULL
;
4106 vm_page_verify_free_list(&vm_page_queue_free
[color
], color
, VM_PAGE_NULL
, FALSE
);
4109 * Clear the "free" bit so that this page
4110 * does not get considered for another
4111 * concurrent physically-contiguous allocation.
4116 vm_page_free_count
--;
4120 * adjust global freelist counts
4122 if (vm_page_free_count
< vm_page_free_count_minimum
)
4123 vm_page_free_count_minimum
= vm_page_free_count
;
4125 if( flags
& KMA_LOMEM
)
4126 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4128 vm_page_find_contiguous_last_idx
= page_idx
;
4131 * we can drop the free queue lock at this point since
4132 * we've pulled any 'free' candidates off of the list
4133 * we need it dropped so that we can do a vm_page_grab
4134 * when substituing for pmapped/dirty pages
4136 lck_mtx_unlock(&vm_page_queue_free_lock
);
4138 start_idx
= tmp_start_idx
;
4139 cur_idx
= page_idx
- 1;
4141 while (start_idx
++ < page_idx
) {
4143 * must go through the list from back to front
4144 * so that the page list is created in the
4145 * correct order - low -> high phys addresses
4147 m1
= &vm_pages
[cur_idx
--];
4151 if (m1
->object
== VM_OBJECT_NULL
) {
4153 * page has already been removed from
4154 * the free list in the 1st pass
4156 assert(m1
->offset
== (vm_object_offset_t
) -1);
4158 assert(!m1
->wanted
);
4159 assert(!m1
->laundry
);
4163 boolean_t disconnected
, reusable
;
4165 if (abort_run
== TRUE
)
4168 object
= m1
->object
;
4170 if (object
!= locked_object
) {
4171 if (locked_object
) {
4172 vm_object_unlock(locked_object
);
4173 locked_object
= VM_OBJECT_NULL
;
4175 if (vm_object_lock_try(object
))
4176 locked_object
= object
;
4178 if (locked_object
== VM_OBJECT_NULL
||
4179 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
4180 m1
->encrypted_cleaning
||
4181 m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
||
4182 m1
->cleaning
|| m1
->overwriting
|| m1
->pageout
|| m1
->busy
)) {
4184 if (locked_object
) {
4185 vm_object_unlock(locked_object
);
4186 locked_object
= VM_OBJECT_NULL
;
4188 tmp_start_idx
= cur_idx
;
4193 disconnected
= FALSE
;
4196 if ((m1
->reusable
||
4197 m1
->object
->all_reusable
) &&
4201 /* reusable page... */
4202 refmod
= pmap_disconnect(m1
->phys_page
);
4203 disconnected
= TRUE
;
4206 * ... not reused: can steal
4207 * without relocating contents.
4217 vm_object_offset_t offset
;
4219 m2
= vm_page_grab();
4221 if (m2
== VM_PAGE_NULL
) {
4222 if (locked_object
) {
4223 vm_object_unlock(locked_object
);
4224 locked_object
= VM_OBJECT_NULL
;
4226 tmp_start_idx
= cur_idx
;
4230 if (! disconnected
) {
4232 refmod
= pmap_disconnect(m1
->phys_page
);
4237 /* copy the page's contents */
4238 pmap_copy_page(m1
->phys_page
, m2
->phys_page
);
4239 /* copy the page's state */
4240 assert(!VM_PAGE_WIRED(m1
));
4242 assert(!m1
->pageout_queue
);
4243 assert(!m1
->laundry
);
4244 m2
->reference
= m1
->reference
;
4245 assert(!m1
->gobbled
);
4246 assert(!m1
->private);
4247 m2
->no_cache
= m1
->no_cache
;
4248 m2
->xpmapped
= m1
->xpmapped
;
4250 assert(!m1
->wanted
);
4251 assert(!m1
->fictitious
);
4252 m2
->pmapped
= m1
->pmapped
; /* should flush cache ? */
4253 m2
->wpmapped
= m1
->wpmapped
;
4254 assert(!m1
->pageout
);
4255 m2
->absent
= m1
->absent
;
4256 m2
->error
= m1
->error
;
4257 m2
->dirty
= m1
->dirty
;
4258 assert(!m1
->cleaning
);
4259 m2
->precious
= m1
->precious
;
4260 m2
->clustered
= m1
->clustered
;
4261 assert(!m1
->overwriting
);
4262 m2
->restart
= m1
->restart
;
4263 m2
->unusual
= m1
->unusual
;
4264 m2
->encrypted
= m1
->encrypted
;
4265 assert(!m1
->encrypted_cleaning
);
4266 m2
->cs_validated
= m1
->cs_validated
;
4267 m2
->cs_tainted
= m1
->cs_tainted
;
4270 * If m1 had really been reusable,
4271 * we would have just stolen it, so
4272 * let's not propagate it's "reusable"
4273 * bit and assert that m2 is not
4274 * marked as "reusable".
4276 // m2->reusable = m1->reusable;
4277 assert(!m2
->reusable
);
4279 assert(!m1
->lopage
);
4280 m2
->slid
= m1
->slid
;
4281 m2
->was_dirty
= m1
->was_dirty
;
4282 m2
->compressor
= m1
->compressor
;
4285 * make sure we clear the ref/mod state
4286 * from the pmap layer... else we risk
4287 * inheriting state from the last time
4288 * this page was used...
4290 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4292 if (refmod
& VM_MEM_REFERENCED
)
4293 m2
->reference
= TRUE
;
4294 if (refmod
& VM_MEM_MODIFIED
) {
4295 SET_PAGE_DIRTY(m2
, TRUE
);
4297 offset
= m1
->offset
;
4300 * completely cleans up the state
4301 * of the page so that it is ready
4302 * to be put onto the free list, or
4303 * for this purpose it looks like it
4304 * just came off of the free list
4306 vm_page_free_prepare(m1
);
4309 * now put the substitute page
4312 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
, FALSE
);
4314 if (m2
->compressor
) {
4316 m2
->wpmapped
= TRUE
;
4318 PMAP_ENTER(kernel_pmap
, m2
->offset
, m2
,
4319 VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, TRUE
);
4325 vm_page_activate(m2
);
4327 vm_page_deactivate(m2
);
4329 PAGE_WAKEUP_DONE(m2
);
4332 assert(!m1
->compressor
);
4335 * completely cleans up the state
4336 * of the page so that it is ready
4337 * to be put onto the free list, or
4338 * for this purpose it looks like it
4339 * just came off of the free list
4341 vm_page_free_prepare(m1
);
4347 m1
->pageq
.next
= (queue_entry_t
) m
;
4348 m1
->pageq
.prev
= NULL
;
4351 if (locked_object
) {
4352 vm_object_unlock(locked_object
);
4353 locked_object
= VM_OBJECT_NULL
;
4356 if (abort_run
== TRUE
) {
4357 if (m
!= VM_PAGE_NULL
) {
4358 vm_page_free_list(m
, FALSE
);
4364 * want the index of the last
4365 * page in this run that was
4366 * successfully 'stolen', so back
4367 * it up 1 for the auto-decrement on use
4368 * and 1 more to bump back over this page
4370 page_idx
= tmp_start_idx
+ 2;
4371 if (page_idx
>= vm_pages_count
) {
4374 page_idx
= last_idx
= 0;
4380 * We didn't find a contiguous range but we didn't
4381 * start from the very first page.
4382 * Start again from the very first page.
4384 RESET_STATE_OF_RUN();
4386 if( flags
& KMA_LOMEM
)
4387 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4389 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4391 last_idx
= page_idx
;
4393 lck_mtx_lock(&vm_page_queue_free_lock
);
4395 * reset our free page limit since we
4396 * dropped the lock protecting the vm_page_free_queue
4398 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4402 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4410 vm_page_gobble_count
+= npages
;
4413 * gobbled pages are also counted as wired pages
4415 vm_page_wire_count
+= npages
;
4417 assert(vm_page_verify_contiguous(m
, npages
));
4420 PAGE_REPLACEMENT_ALLOWED(FALSE
);
4422 vm_page_unlock_queues();
4425 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4427 tv_end_sec
-= tv_start_sec
;
4428 if (tv_end_usec
< tv_start_usec
) {
4430 tv_end_usec
+= 1000000;
4432 tv_end_usec
-= tv_start_usec
;
4433 if (tv_end_usec
>= 1000000) {
4435 tv_end_sec
-= 1000000;
4437 if (vm_page_find_contig_debug
) {
4438 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4439 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4440 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4441 scanned
, yielded
, dumped_run
, stolen_pages
, compressed_pages
);
4446 vm_page_verify_free_lists();
4452 * Allocate a list of contiguous, wired pages.
4464 unsigned int npages
;
4466 if (size
% PAGE_SIZE
!= 0)
4467 return KERN_INVALID_ARGUMENT
;
4469 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4470 if (npages
!= size
/ PAGE_SIZE
) {
4471 /* 32-bit overflow */
4472 return KERN_INVALID_ARGUMENT
;
4476 * Obtain a pointer to a subset of the free
4477 * list large enough to satisfy the request;
4478 * the region will be physically contiguous.
4480 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4482 if (pages
== VM_PAGE_NULL
)
4483 return KERN_NO_SPACE
;
4485 * determine need for wakeups
4487 if ((vm_page_free_count
< vm_page_free_min
) ||
4488 ((vm_page_free_count
< vm_page_free_target
) &&
4489 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4490 thread_wakeup((event_t
) &vm_page_free_wanted
);
4492 VM_CHECK_MEMORYSTATUS
;
4495 * The CPM pages should now be available and
4496 * ordered by ascending physical address.
4498 assert(vm_page_verify_contiguous(pages
, npages
));
4501 return KERN_SUCCESS
;
4505 unsigned int vm_max_delayed_work_limit
= DEFAULT_DELAYED_WORK_LIMIT
;
4508 * when working on a 'run' of pages, it is necessary to hold
4509 * the vm_page_queue_lock (a hot global lock) for certain operations
4510 * on the page... however, the majority of the work can be done
4511 * while merely holding the object lock... in fact there are certain
4512 * collections of pages that don't require any work brokered by the
4513 * vm_page_queue_lock... to mitigate the time spent behind the global
4514 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4515 * while doing all of the work that doesn't require the vm_page_queue_lock...
4516 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4517 * necessary work for each page... we will grab the busy bit on the page
4518 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4519 * if it can't immediately take the vm_page_queue_lock in order to compete
4520 * for the locks in the same order that vm_pageout_scan takes them.
4521 * the operation names are modeled after the names of the routines that
4522 * need to be called in order to make the changes very obvious in the
4527 vm_page_do_delayed_work(
4529 struct vm_page_delayed_work
*dwp
,
4534 vm_page_t local_free_q
= VM_PAGE_NULL
;
4537 * pageout_scan takes the vm_page_lock_queues first
4538 * then tries for the object lock... to avoid what
4539 * is effectively a lock inversion, we'll go to the
4540 * trouble of taking them in that same order... otherwise
4541 * if this object contains the majority of the pages resident
4542 * in the UBC (or a small set of large objects actively being
4543 * worked on contain the majority of the pages), we could
4544 * cause the pageout_scan thread to 'starve' in its attempt
4545 * to find pages to move to the free queue, since it has to
4546 * successfully acquire the object lock of any candidate page
4547 * before it can steal/clean it.
4549 if (!vm_page_trylockspin_queues()) {
4550 vm_object_unlock(object
);
4552 vm_page_lockspin_queues();
4554 for (j
= 0; ; j
++) {
4555 if (!vm_object_lock_avoid(object
) &&
4556 _vm_object_lock_try(object
))
4558 vm_page_unlock_queues();
4560 vm_page_lockspin_queues();
4563 for (j
= 0; j
< dw_count
; j
++, dwp
++) {
4567 if (dwp
->dw_mask
& DW_vm_pageout_throttle_up
)
4568 vm_pageout_throttle_up(m
);
4570 if (dwp
->dw_mask
& DW_vm_page_wire
)
4572 else if (dwp
->dw_mask
& DW_vm_page_unwire
) {
4575 queueit
= (dwp
->dw_mask
& DW_vm_page_free
) ? FALSE
: TRUE
;
4577 vm_page_unwire(m
, queueit
);
4579 if (dwp
->dw_mask
& DW_vm_page_free
) {
4580 vm_page_free_prepare_queues(m
);
4582 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
4584 * Add this page to our list of reclaimed pages,
4585 * to be freed later.
4587 m
->pageq
.next
= (queue_entry_t
) local_free_q
;
4590 if (dwp
->dw_mask
& DW_vm_page_deactivate_internal
)
4591 vm_page_deactivate_internal(m
, FALSE
);
4592 else if (dwp
->dw_mask
& DW_vm_page_activate
) {
4593 if (m
->active
== FALSE
) {
4594 vm_page_activate(m
);
4597 else if (dwp
->dw_mask
& DW_vm_page_speculate
)
4598 vm_page_speculate(m
, TRUE
);
4599 else if (dwp
->dw_mask
& DW_enqueue_cleaned
) {
4601 * if we didn't hold the object lock and did this,
4602 * we might disconnect the page, then someone might
4603 * soft fault it back in, then we would put it on the
4604 * cleaned queue, and so we would have a referenced (maybe even dirty)
4605 * page on that queue, which we don't want
4607 int refmod_state
= pmap_disconnect(m
->phys_page
);
4609 if ((refmod_state
& VM_MEM_REFERENCED
)) {
4611 * this page has been touched since it got cleaned; let's activate it
4612 * if it hasn't already been
4614 vm_pageout_enqueued_cleaned
++;
4615 vm_pageout_cleaned_reactivated
++;
4616 vm_pageout_cleaned_commit_reactivated
++;
4618 if (m
->active
== FALSE
)
4619 vm_page_activate(m
);
4621 m
->reference
= FALSE
;
4622 vm_page_enqueue_cleaned(m
);
4625 else if (dwp
->dw_mask
& DW_vm_page_lru
)
4627 else if (dwp
->dw_mask
& DW_VM_PAGE_QUEUES_REMOVE
) {
4628 if ( !m
->pageout_queue
)
4629 VM_PAGE_QUEUES_REMOVE(m
);
4631 if (dwp
->dw_mask
& DW_set_reference
)
4632 m
->reference
= TRUE
;
4633 else if (dwp
->dw_mask
& DW_clear_reference
)
4634 m
->reference
= FALSE
;
4636 if (dwp
->dw_mask
& DW_move_page
) {
4637 if ( !m
->pageout_queue
) {
4638 VM_PAGE_QUEUES_REMOVE(m
);
4640 assert(m
->object
!= kernel_object
);
4642 VM_PAGE_ENQUEUE_INACTIVE(m
, FALSE
);
4645 if (dwp
->dw_mask
& DW_clear_busy
)
4648 if (dwp
->dw_mask
& DW_PAGE_WAKEUP
)
4652 vm_page_unlock_queues();
4655 vm_page_free_list(local_free_q
, TRUE
);
4657 VM_CHECK_MEMORYSTATUS
;
4667 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4671 if ( !(flags
& KMA_LOMEM
))
4672 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4674 for (i
= 0; i
< page_count
; i
++) {
4676 mem
= vm_page_grablo();
4678 if (mem
== VM_PAGE_NULL
) {
4680 vm_page_free_list(lo_page_list
, FALSE
);
4682 *list
= VM_PAGE_NULL
;
4684 return (KERN_RESOURCE_SHORTAGE
);
4686 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4689 *list
= lo_page_list
;
4691 return (KERN_SUCCESS
);
4695 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4697 page
->offset
= offset
;
4701 vm_page_get_next(vm_page_t page
)
4703 return ((vm_page_t
) page
->pageq
.next
);
4707 vm_page_get_offset(vm_page_t page
)
4709 return (page
->offset
);
4713 vm_page_get_phys_page(vm_page_t page
)
4715 return (page
->phys_page
);
4719 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4723 static vm_page_t hibernate_gobble_queue
;
4725 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4727 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4728 static int hibernate_flush_dirty_pages(int);
4729 static int hibernate_flush_queue(queue_head_t
*, int);
4731 void hibernate_flush_wait(void);
4732 void hibernate_mark_in_progress(void);
4733 void hibernate_clear_in_progress(void);
4735 void hibernate_free_range(int, int);
4736 void hibernate_hash_insert_page(vm_page_t
);
4737 uint32_t hibernate_mark_as_unneeded(addr64_t
, addr64_t
, hibernate_page_list_t
*, hibernate_page_list_t
*);
4738 void hibernate_rebuild_vm_structs(void);
4739 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t
*, hibernate_page_list_t
*);
4740 ppnum_t
hibernate_lookup_paddr(unsigned int);
4742 struct hibernate_statistics
{
4743 int hibernate_considered
;
4744 int hibernate_reentered_on_q
;
4745 int hibernate_found_dirty
;
4746 int hibernate_skipped_cleaning
;
4747 int hibernate_skipped_transient
;
4748 int hibernate_skipped_precious
;
4749 int hibernate_skipped_external
;
4750 int hibernate_queue_nolock
;
4751 int hibernate_queue_paused
;
4752 int hibernate_throttled
;
4753 int hibernate_throttle_timeout
;
4754 int hibernate_drained
;
4755 int hibernate_drain_timeout
;
4757 int cd_found_precious
;
4760 int cd_found_unusual
;
4761 int cd_found_cleaning
;
4762 int cd_found_laundry
;
4764 int cd_found_xpmapped
;
4767 int cd_vm_page_wire_count
;
4768 int cd_vm_struct_pages_unneeded
;
4777 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
4779 wait_result_t wait_result
;
4781 vm_page_lock_queues();
4783 while ( !queue_empty(&q
->pgo_pending
) ) {
4785 q
->pgo_draining
= TRUE
;
4787 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
4789 vm_page_unlock_queues();
4791 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4793 if (wait_result
== THREAD_TIMED_OUT
&& !queue_empty(&q
->pgo_pending
)) {
4794 hibernate_stats
.hibernate_drain_timeout
++;
4796 if (q
== &vm_pageout_queue_external
)
4801 vm_page_lock_queues();
4803 hibernate_stats
.hibernate_drained
++;
4805 vm_page_unlock_queues();
4811 boolean_t hibernate_skip_external
= FALSE
;
4814 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
4817 vm_object_t l_object
= NULL
;
4818 vm_object_t m_object
= NULL
;
4819 int refmod_state
= 0;
4820 int try_failed_count
= 0;
4822 int current_run
= 0;
4823 struct vm_pageout_queue
*iq
;
4824 struct vm_pageout_queue
*eq
;
4825 struct vm_pageout_queue
*tq
;
4828 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
4830 iq
= &vm_pageout_queue_internal
;
4831 eq
= &vm_pageout_queue_external
;
4833 vm_page_lock_queues();
4835 while (qcount
&& !queue_empty(q
)) {
4837 if (current_run
++ == 1000) {
4838 if (hibernate_should_abort()) {
4845 m
= (vm_page_t
) queue_first(q
);
4846 m_object
= m
->object
;
4849 * check to see if we currently are working
4850 * with the same object... if so, we've
4851 * already got the lock
4853 if (m_object
!= l_object
) {
4855 * the object associated with candidate page is
4856 * different from the one we were just working
4857 * with... dump the lock if we still own it
4859 if (l_object
!= NULL
) {
4860 vm_object_unlock(l_object
);
4864 * Try to lock object; since we've alread got the
4865 * page queues lock, we can only 'try' for this one.
4866 * if the 'try' fails, we need to do a mutex_pause
4867 * to allow the owner of the object lock a chance to
4870 if ( !vm_object_lock_try_scan(m_object
)) {
4872 if (try_failed_count
> 20) {
4873 hibernate_stats
.hibernate_queue_nolock
++;
4875 goto reenter_pg_on_q
;
4877 vm_pageout_scan_wants_object
= m_object
;
4879 vm_page_unlock_queues();
4880 mutex_pause(try_failed_count
++);
4881 vm_page_lock_queues();
4883 hibernate_stats
.hibernate_queue_paused
++;
4886 l_object
= m_object
;
4887 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4890 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->laundry
|| m
->busy
|| m
->absent
|| m
->error
) {
4892 * page is not to be cleaned
4893 * put it back on the head of its queue
4896 hibernate_stats
.hibernate_skipped_cleaning
++;
4898 hibernate_stats
.hibernate_skipped_transient
++;
4900 goto reenter_pg_on_q
;
4902 if (m_object
->copy
== VM_OBJECT_NULL
) {
4903 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
4905 * let the normal hibernate image path
4908 goto reenter_pg_on_q
;
4911 if ( !m
->dirty
&& m
->pmapped
) {
4912 refmod_state
= pmap_get_refmod(m
->phys_page
);
4914 if ((refmod_state
& VM_MEM_MODIFIED
)) {
4915 SET_PAGE_DIRTY(m
, FALSE
);
4922 * page is not to be cleaned
4923 * put it back on the head of its queue
4926 hibernate_stats
.hibernate_skipped_precious
++;
4928 goto reenter_pg_on_q
;
4931 if (hibernate_skip_external
== TRUE
&& !m_object
->internal
) {
4933 hibernate_stats
.hibernate_skipped_external
++;
4935 goto reenter_pg_on_q
;
4939 if (m_object
->internal
) {
4940 if (VM_PAGE_Q_THROTTLED(iq
))
4942 } else if (VM_PAGE_Q_THROTTLED(eq
))
4946 wait_result_t wait_result
;
4949 if (l_object
!= NULL
) {
4950 vm_object_unlock(l_object
);
4953 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4955 while (retval
== 0) {
4957 tq
->pgo_throttled
= TRUE
;
4959 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
4961 vm_page_unlock_queues();
4963 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4965 vm_page_lock_queues();
4967 if (wait_result
!= THREAD_TIMED_OUT
)
4969 if (!VM_PAGE_Q_THROTTLED(tq
))
4972 if (hibernate_should_abort())
4975 if (--wait_count
== 0) {
4977 hibernate_stats
.hibernate_throttle_timeout
++;
4980 hibernate_skip_external
= TRUE
;
4989 hibernate_stats
.hibernate_throttled
++;
4994 * we've already factored out pages in the laundry which
4995 * means this page can't be on the pageout queue so it's
4996 * safe to do the VM_PAGE_QUEUES_REMOVE
4998 assert(!m
->pageout_queue
);
5000 VM_PAGE_QUEUES_REMOVE(m
);
5002 if (COMPRESSED_PAGER_IS_ACTIVE
)
5003 pmap_disconnect(m
->phys_page
);
5005 vm_pageout_cluster(m
, FALSE
);
5007 hibernate_stats
.hibernate_found_dirty
++;
5012 queue_remove(q
, m
, vm_page_t
, pageq
);
5013 queue_enter(q
, m
, vm_page_t
, pageq
);
5015 hibernate_stats
.hibernate_reentered_on_q
++;
5017 hibernate_stats
.hibernate_considered
++;
5020 try_failed_count
= 0;
5022 if (l_object
!= NULL
) {
5023 vm_object_unlock(l_object
);
5026 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
5028 vm_page_unlock_queues();
5030 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
5037 hibernate_flush_dirty_pages(int pass
)
5039 struct vm_speculative_age_q
*aq
;
5042 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
5044 if (vm_page_local_q
) {
5045 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5046 vm_page_reactivate_local(i
, TRUE
, FALSE
);
5049 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
5053 aq
= &vm_page_queue_speculative
[i
];
5055 if (queue_empty(&aq
->age_q
))
5059 vm_page_lockspin_queues();
5061 queue_iterate(&aq
->age_q
,
5068 vm_page_unlock_queues();
5071 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
5075 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_page_anonymous_count
- vm_page_cleaned_count
))
5077 if (hibernate_flush_queue(&vm_page_queue_anonymous
, vm_page_anonymous_count
))
5079 if (hibernate_flush_queue(&vm_page_queue_cleaned
, vm_page_cleaned_count
))
5081 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
5084 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5085 vm_compressor_record_warmup_start();
5087 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
)) {
5088 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5089 vm_compressor_record_warmup_end();
5092 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
)) {
5093 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5094 vm_compressor_record_warmup_end();
5097 if (COMPRESSED_PAGER_IS_ACTIVE
&& pass
== 1)
5098 vm_compressor_record_warmup_end();
5100 if (hibernate_skip_external
== FALSE
&& hibernate_drain_pageout_queue(&vm_pageout_queue_external
))
5108 hibernate_flush_memory()
5112 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
5114 hibernate_cleaning_in_progress
= TRUE
;
5115 hibernate_skip_external
= FALSE
;
5117 if ((retval
= hibernate_flush_dirty_pages(1)) == 0) {
5119 if (COMPRESSED_PAGER_IS_ACTIVE
) {
5121 if ((retval
= hibernate_flush_dirty_pages(2)) == 0) {
5123 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_START
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5125 vm_compressor_flush();
5127 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 10) | DBG_FUNC_END
, VM_PAGE_COMPRESSOR_COUNT
, 0, 0, 0, 0);
5130 if (retval
== 0 && consider_buffer_cache_collect
!= NULL
) {
5131 unsigned int orig_wire_count
;
5133 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5134 orig_wire_count
= vm_page_wire_count
;
5136 (void)(*consider_buffer_cache_collect
)(1);
5137 consider_zone_gc(TRUE
);
5139 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count
- vm_page_wire_count
);
5141 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, orig_wire_count
- vm_page_wire_count
, 0, 0, 0, 0);
5144 hibernate_cleaning_in_progress
= FALSE
;
5146 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
5148 if (retval
&& COMPRESSED_PAGER_IS_ACTIVE
)
5149 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT
);
5152 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5153 hibernate_stats
.hibernate_considered
,
5154 hibernate_stats
.hibernate_reentered_on_q
,
5155 hibernate_stats
.hibernate_found_dirty
);
5156 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5157 hibernate_stats
.hibernate_skipped_cleaning
,
5158 hibernate_stats
.hibernate_skipped_transient
,
5159 hibernate_stats
.hibernate_skipped_precious
,
5160 hibernate_stats
.hibernate_skipped_external
,
5161 hibernate_stats
.hibernate_queue_nolock
);
5162 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5163 hibernate_stats
.hibernate_queue_paused
,
5164 hibernate_stats
.hibernate_throttled
,
5165 hibernate_stats
.hibernate_throttle_timeout
,
5166 hibernate_stats
.hibernate_drained
,
5167 hibernate_stats
.hibernate_drain_timeout
);
5174 hibernate_page_list_zero(hibernate_page_list_t
*list
)
5177 hibernate_bitmap_t
* bitmap
;
5179 bitmap
= &list
->bank_bitmap
[0];
5180 for (bank
= 0; bank
< list
->bank_count
; bank
++)
5184 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
5185 // set out-of-bound bits at end of bitmap.
5186 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
5188 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
5190 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5195 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
5199 uint64_t start
, end
, timeout
, nsec
;
5200 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
5201 clock_get_uptime(&start
);
5203 for (i
= 0; i
< gobble_count
; i
++)
5205 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
5207 clock_get_uptime(&end
);
5217 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
5218 hibernate_gobble_queue
= m
;
5221 clock_get_uptime(&end
);
5222 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5223 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
5227 hibernate_free_gobble_pages(void)
5232 m
= (vm_page_t
) hibernate_gobble_queue
;
5235 next
= (vm_page_t
) m
->pageq
.next
;
5240 hibernate_gobble_queue
= VM_PAGE_NULL
;
5243 HIBLOG("Freed %d pages\n", count
);
5247 hibernate_consider_discard(vm_page_t m
, boolean_t preflight
)
5249 vm_object_t object
= NULL
;
5251 boolean_t discard
= FALSE
;
5256 panic("hibernate_consider_discard: private");
5258 if (!vm_object_lock_try(m
->object
)) {
5259 if (!preflight
) hibernate_stats
.cd_lock_failed
++;
5264 if (VM_PAGE_WIRED(m
)) {
5265 if (!preflight
) hibernate_stats
.cd_found_wired
++;
5269 if (!preflight
) hibernate_stats
.cd_found_precious
++;
5272 if (m
->busy
|| !object
->alive
) {
5274 * Somebody is playing with this page.
5276 if (!preflight
) hibernate_stats
.cd_found_busy
++;
5279 if (m
->absent
|| m
->unusual
|| m
->error
) {
5281 * If it's unusual in anyway, ignore it
5283 if (!preflight
) hibernate_stats
.cd_found_unusual
++;
5287 if (!preflight
) hibernate_stats
.cd_found_cleaning
++;
5291 if (!preflight
) hibernate_stats
.cd_found_laundry
++;
5296 refmod_state
= pmap_get_refmod(m
->phys_page
);
5298 if (refmod_state
& VM_MEM_REFERENCED
)
5299 m
->reference
= TRUE
;
5300 if (refmod_state
& VM_MEM_MODIFIED
) {
5301 SET_PAGE_DIRTY(m
, FALSE
);
5306 * If it's clean or purgeable we can discard the page on wakeup.
5308 discard
= (!m
->dirty
)
5309 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
5310 || (VM_PURGABLE_EMPTY
== object
->purgable
);
5313 if (discard
== FALSE
) {
5315 hibernate_stats
.cd_found_dirty
++;
5316 } else if (m
->xpmapped
&& m
->reference
) {
5318 hibernate_stats
.cd_found_xpmapped
++;
5325 vm_object_unlock(object
);
5332 hibernate_discard_page(vm_page_t m
)
5334 if (m
->absent
|| m
->unusual
|| m
->error
)
5336 * If it's unusual in anyway, ignore
5341 vm_object_t object
= m
->object
;
5342 if (!vm_object_lock_try(m
->object
))
5343 panic("hibernate_discard_page(%p) !vm_object_lock_try", m
);
5345 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5346 makes sure these locks are uncontended before sleep */
5349 if (m
->pmapped
== TRUE
)
5351 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
5355 panic("hibernate_discard_page(%p) laundry", m
);
5357 panic("hibernate_discard_page(%p) private", m
);
5359 panic("hibernate_discard_page(%p) fictitious", m
);
5361 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
5363 /* object should be on a queue */
5364 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
5365 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
5367 if (m
->object
->purgeable_when_ripe
) {
5368 vm_purgeable_token_delete_first(old_queue
);
5370 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
5376 vm_object_unlock(object
);
5381 Grab locks for hibernate_page_list_setall()
5384 hibernate_vm_lock_queues(void)
5386 vm_object_lock(compressor_object
);
5387 vm_page_lock_queues();
5388 lck_mtx_lock(&vm_page_queue_free_lock
);
5390 if (vm_page_local_q
) {
5392 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5394 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5395 VPL_LOCK(&lq
->vpl_lock
);
5401 hibernate_vm_unlock_queues(void)
5403 if (vm_page_local_q
) {
5405 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5407 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5408 VPL_UNLOCK(&lq
->vpl_lock
);
5411 lck_mtx_unlock(&vm_page_queue_free_lock
);
5412 vm_page_unlock_queues();
5413 vm_object_unlock(compressor_object
);
5417 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5418 pages known to VM to not need saving are subtracted.
5419 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5423 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
5424 hibernate_page_list_t
* page_list_wired
,
5425 hibernate_page_list_t
* page_list_pal
,
5426 boolean_t preflight
,
5427 boolean_t will_discard
,
5428 uint32_t * pagesOut
)
5430 uint64_t start
, end
, nsec
;
5433 uint32_t pages
= page_list
->page_count
;
5434 uint32_t count_anonymous
= 0, count_throttled
= 0, count_compressor
= 0;
5435 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0, count_cleaned
= 0;
5436 uint32_t count_wire
= pages
;
5437 uint32_t count_discard_active
= 0;
5438 uint32_t count_discard_inactive
= 0;
5439 uint32_t count_discard_cleaned
= 0;
5440 uint32_t count_discard_purgeable
= 0;
5441 uint32_t count_discard_speculative
= 0;
5442 uint32_t count_discard_vm_struct_pages
= 0;
5445 hibernate_bitmap_t
* bitmap
;
5446 hibernate_bitmap_t
* bitmap_wired
;
5447 boolean_t discard_all
;
5450 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight
, page_list
, page_list_wired
);
5454 page_list_wired
= NULL
;
5455 page_list_pal
= NULL
;
5456 discard_all
= FALSE
;
5458 discard_all
= will_discard
;
5464 vm_page_lock_queues();
5465 if (vm_page_local_q
) {
5466 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5468 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5469 VPL_LOCK(&lq
->vpl_lock
);
5476 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
5478 clock_get_uptime(&start
);
5481 hibernate_page_list_zero(page_list
);
5482 hibernate_page_list_zero(page_list_wired
);
5483 hibernate_page_list_zero(page_list_pal
);
5485 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
5486 hibernate_stats
.cd_pages
= pages
;
5489 if (vm_page_local_q
) {
5490 for (i
= 0; i
< vm_page_local_q_count
; i
++)
5491 vm_page_reactivate_local(i
, TRUE
, !preflight
);
5495 vm_object_lock(compressor_object
);
5496 vm_page_lock_queues();
5497 lck_mtx_lock(&vm_page_queue_free_lock
);
5500 m
= (vm_page_t
) hibernate_gobble_queue
;
5506 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5507 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5509 m
= (vm_page_t
) m
->pageq
.next
;
5512 if (!preflight
) for( i
= 0; i
< real_ncpus
; i
++ )
5514 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
5516 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
5520 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5521 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5523 hibernate_stats
.cd_local_free
++;
5524 hibernate_stats
.cd_total_free
++;
5529 for( i
= 0; i
< vm_colors
; i
++ )
5531 queue_iterate(&vm_page_queue_free
[i
],
5539 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5540 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5542 hibernate_stats
.cd_total_free
++;
5547 queue_iterate(&vm_lopage_queue_free
,
5555 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5556 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5558 hibernate_stats
.cd_total_free
++;
5562 m
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
5563 while (m
&& !queue_end(&vm_page_queue_throttled
, (queue_entry_t
)m
))
5565 next
= (vm_page_t
) m
->pageq
.next
;
5567 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5568 && hibernate_consider_discard(m
, preflight
))
5570 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5571 count_discard_inactive
++;
5572 discard
= discard_all
;
5577 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5579 if (discard
) hibernate_discard_page(m
);
5583 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5584 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5586 next
= (vm_page_t
) m
->pageq
.next
;
5588 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5589 && hibernate_consider_discard(m
, preflight
))
5591 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5593 count_discard_purgeable
++;
5595 count_discard_inactive
++;
5596 discard
= discard_all
;
5601 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5602 if (discard
) hibernate_discard_page(m
);
5606 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5607 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5609 next
= (vm_page_t
) m
->pageq
.next
;
5611 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5612 && hibernate_consider_discard(m
, preflight
))
5614 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5616 count_discard_purgeable
++;
5618 count_discard_inactive
++;
5619 discard
= discard_all
;
5624 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5625 if (discard
) hibernate_discard_page(m
);
5629 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
5630 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
5632 next
= (vm_page_t
) m
->pageq
.next
;
5634 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5635 && hibernate_consider_discard(m
, preflight
))
5637 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5639 count_discard_purgeable
++;
5641 count_discard_cleaned
++;
5642 discard
= discard_all
;
5647 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5648 if (discard
) hibernate_discard_page(m
);
5652 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5654 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5655 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5657 next
= (vm_page_t
) m
->pageq
.next
;
5659 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5660 && hibernate_consider_discard(m
, preflight
))
5662 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5663 count_discard_speculative
++;
5664 discard
= discard_all
;
5667 count_speculative
++;
5669 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5670 if (discard
) hibernate_discard_page(m
);
5675 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5676 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5678 next
= (vm_page_t
) m
->pageq
.next
;
5680 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5681 && hibernate_consider_discard(m
, preflight
))
5683 if (!preflight
) hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5685 count_discard_purgeable
++;
5687 count_discard_active
++;
5688 discard
= discard_all
;
5693 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5694 if (discard
) hibernate_discard_page(m
);
5698 queue_iterate(&compressor_object
->memq
, m
, vm_page_t
, listq
)
5702 if (!preflight
) hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5705 if (preflight
== FALSE
&& discard_all
== TRUE
) {
5706 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 12) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
5708 HIBLOG("hibernate_teardown started\n");
5709 count_discard_vm_struct_pages
= hibernate_teardown_vm_structs(page_list
, page_list_wired
);
5710 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages
);
5712 pages
-= count_discard_vm_struct_pages
;
5713 count_wire
-= count_discard_vm_struct_pages
;
5715 hibernate_stats
.cd_vm_struct_pages_unneeded
= count_discard_vm_struct_pages
;
5717 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
5721 // pull wired from hibernate_bitmap
5722 bitmap
= &page_list
->bank_bitmap
[0];
5723 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5724 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5726 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5727 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5728 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5729 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5733 // machine dependent adjustments
5734 hibernate_page_list_setall_machine(page_list
, page_list_wired
, preflight
, &pages
);
5737 hibernate_stats
.cd_count_wire
= count_wire
;
5738 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+
5739 count_discard_speculative
+ count_discard_cleaned
+ count_discard_vm_struct_pages
;
5742 clock_get_uptime(&end
);
5743 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5744 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
5746 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5747 pages
, count_wire
, count_active
, count_inactive
, count_cleaned
, count_speculative
, count_anonymous
, count_throttled
, count_compressor
, hibernate_stats
.cd_found_xpmapped
,
5748 discard_all
? "did" : "could",
5749 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
5751 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
- count_discard_cleaned
;
5753 if (preflight
&& will_discard
) *pagesOut
-= count_compressor
+ count_throttled
+ count_anonymous
+ count_inactive
+ count_cleaned
+ count_speculative
+ count_active
;
5758 if (vm_page_local_q
) {
5759 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5761 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5762 VPL_UNLOCK(&lq
->vpl_lock
);
5765 vm_page_unlock_queues();
5770 lck_mtx_unlock(&vm_page_queue_free_lock
);
5771 vm_page_unlock_queues();
5772 vm_object_unlock(compressor_object
);
5775 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
5779 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
5781 uint64_t start
, end
, nsec
;
5785 uint32_t count_discard_active
= 0;
5786 uint32_t count_discard_inactive
= 0;
5787 uint32_t count_discard_purgeable
= 0;
5788 uint32_t count_discard_cleaned
= 0;
5789 uint32_t count_discard_speculative
= 0;
5793 vm_page_lock_queues();
5794 if (vm_page_local_q
) {
5795 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5797 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5798 VPL_LOCK(&lq
->vpl_lock
);
5803 clock_get_uptime(&start
);
5805 m
= (vm_page_t
) queue_first(&vm_page_queue_anonymous
);
5806 while (m
&& !queue_end(&vm_page_queue_anonymous
, (queue_entry_t
)m
))
5808 next
= (vm_page_t
) m
->pageq
.next
;
5809 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5812 count_discard_purgeable
++;
5814 count_discard_inactive
++;
5815 hibernate_discard_page(m
);
5820 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5822 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5823 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5825 next
= (vm_page_t
) m
->pageq
.next
;
5826 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5828 count_discard_speculative
++;
5829 hibernate_discard_page(m
);
5835 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5836 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5838 next
= (vm_page_t
) m
->pageq
.next
;
5839 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5842 count_discard_purgeable
++;
5844 count_discard_inactive
++;
5845 hibernate_discard_page(m
);
5850 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5851 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5853 next
= (vm_page_t
) m
->pageq
.next
;
5854 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5857 count_discard_purgeable
++;
5859 count_discard_active
++;
5860 hibernate_discard_page(m
);
5865 m
= (vm_page_t
) queue_first(&vm_page_queue_cleaned
);
5866 while (m
&& !queue_end(&vm_page_queue_cleaned
, (queue_entry_t
)m
))
5868 next
= (vm_page_t
) m
->pageq
.next
;
5869 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5872 count_discard_purgeable
++;
5874 count_discard_cleaned
++;
5875 hibernate_discard_page(m
);
5881 if (vm_page_local_q
) {
5882 for (i
= 0; i
< vm_page_local_q_count
; i
++) {
5884 lq
= &vm_page_local_q
[i
].vpl_un
.vpl
;
5885 VPL_UNLOCK(&lq
->vpl_lock
);
5888 vm_page_unlock_queues();
5891 clock_get_uptime(&end
);
5892 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5893 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5895 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
, count_discard_cleaned
);
5898 boolean_t hibernate_paddr_map_inited
= FALSE
;
5899 boolean_t hibernate_rebuild_needed
= FALSE
;
5900 unsigned int hibernate_teardown_last_valid_compact_indx
= -1;
5901 vm_page_t hibernate_rebuild_hash_list
= NULL
;
5903 unsigned int hibernate_teardown_found_tabled_pages
= 0;
5904 unsigned int hibernate_teardown_found_created_pages
= 0;
5905 unsigned int hibernate_teardown_found_free_pages
= 0;
5906 unsigned int hibernate_teardown_vm_page_free_count
;
5909 struct ppnum_mapping
{
5910 struct ppnum_mapping
*ppnm_next
;
5911 ppnum_t ppnm_base_paddr
;
5912 unsigned int ppnm_sindx
;
5913 unsigned int ppnm_eindx
;
5916 struct ppnum_mapping
*ppnm_head
;
5917 struct ppnum_mapping
*ppnm_last_found
= NULL
;
5921 hibernate_create_paddr_map()
5924 ppnum_t next_ppnum_in_run
= 0;
5925 struct ppnum_mapping
*ppnm
= NULL
;
5927 if (hibernate_paddr_map_inited
== FALSE
) {
5929 for (i
= 0; i
< vm_pages_count
; i
++) {
5932 ppnm
->ppnm_eindx
= i
;
5934 if (ppnm
== NULL
|| vm_pages
[i
].phys_page
!= next_ppnum_in_run
) {
5936 ppnm
= kalloc(sizeof(struct ppnum_mapping
));
5938 ppnm
->ppnm_next
= ppnm_head
;
5941 ppnm
->ppnm_sindx
= i
;
5942 ppnm
->ppnm_base_paddr
= vm_pages
[i
].phys_page
;
5944 next_ppnum_in_run
= vm_pages
[i
].phys_page
+ 1;
5948 hibernate_paddr_map_inited
= TRUE
;
5953 hibernate_lookup_paddr(unsigned int indx
)
5955 struct ppnum_mapping
*ppnm
= NULL
;
5957 ppnm
= ppnm_last_found
;
5960 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
)
5963 for (ppnm
= ppnm_head
; ppnm
; ppnm
= ppnm
->ppnm_next
) {
5965 if (indx
>= ppnm
->ppnm_sindx
&& indx
< ppnm
->ppnm_eindx
) {
5966 ppnm_last_found
= ppnm
;
5971 panic("hibernate_lookup_paddr of %d failed\n", indx
);
5973 return (ppnm
->ppnm_base_paddr
+ (indx
- ppnm
->ppnm_sindx
));
5978 hibernate_mark_as_unneeded(addr64_t saddr
, addr64_t eaddr
, hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
5980 addr64_t saddr_aligned
;
5981 addr64_t eaddr_aligned
;
5984 unsigned int mark_as_unneeded_pages
= 0;
5986 saddr_aligned
= (saddr
+ PAGE_MASK_64
) & ~PAGE_MASK_64
;
5987 eaddr_aligned
= eaddr
& ~PAGE_MASK_64
;
5989 for (addr
= saddr_aligned
; addr
< eaddr_aligned
; addr
+= PAGE_SIZE_64
) {
5991 paddr
= pmap_find_phys(kernel_pmap
, addr
);
5995 hibernate_page_bitset(page_list
, TRUE
, paddr
);
5996 hibernate_page_bitset(page_list_wired
, TRUE
, paddr
);
5998 mark_as_unneeded_pages
++;
6000 return (mark_as_unneeded_pages
);
6005 hibernate_hash_insert_page(vm_page_t mem
)
6007 vm_page_bucket_t
*bucket
;
6010 assert(mem
->tabled
);
6011 assert(mem
->object
);
6012 assert(mem
->offset
!= (vm_object_offset_t
) -1);
6015 * Insert it into the object_object/offset hash table
6017 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
6018 bucket
= &vm_page_buckets
[hash_id
];
6020 mem
->next
= bucket
->pages
;
6021 bucket
->pages
= mem
;
6026 hibernate_free_range(int sindx
, int eindx
)
6031 while (sindx
< eindx
) {
6032 mem
= &vm_pages
[sindx
];
6034 vm_page_init(mem
, hibernate_lookup_paddr(sindx
), FALSE
);
6036 mem
->lopage
= FALSE
;
6039 color
= mem
->phys_page
& vm_color_mask
;
6040 queue_enter_first(&vm_page_queue_free
[color
],
6044 vm_page_free_count
++;
6051 extern void hibernate_rebuild_pmap_structs(void);
6054 hibernate_rebuild_vm_structs(void)
6056 int cindx
, sindx
, eindx
;
6057 vm_page_t mem
, tmem
, mem_next
;
6058 AbsoluteTime startTime
, endTime
;
6061 if (hibernate_rebuild_needed
== FALSE
)
6064 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_START
, 0, 0, 0, 0, 0);
6065 HIBLOG("hibernate_rebuild started\n");
6067 clock_get_uptime(&startTime
);
6069 hibernate_rebuild_pmap_structs();
6071 bzero(&vm_page_buckets
[0], vm_page_bucket_count
* sizeof(vm_page_bucket_t
));
6072 eindx
= vm_pages_count
;
6074 for (cindx
= hibernate_teardown_last_valid_compact_indx
; cindx
>= 0; cindx
--) {
6076 mem
= &vm_pages
[cindx
];
6078 * hibernate_teardown_vm_structs leaves the location where
6079 * this vm_page_t must be located in "next".
6084 sindx
= (int)(tmem
- &vm_pages
[0]);
6088 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6089 * so move it back to its real location
6095 hibernate_hash_insert_page(mem
);
6097 * the 'hole' between this vm_page_t and the previous
6098 * vm_page_t we moved needs to be initialized as
6099 * a range of free vm_page_t's
6101 hibernate_free_range(sindx
+ 1, eindx
);
6106 hibernate_free_range(0, sindx
);
6108 assert(vm_page_free_count
== hibernate_teardown_vm_page_free_count
);
6111 * process the list of vm_page_t's that were tabled in the hash,
6112 * but were not located in the vm_pages arrary... these are
6113 * vm_page_t's that were created on the fly (i.e. fictitious)
6115 for (mem
= hibernate_rebuild_hash_list
; mem
; mem
= mem_next
) {
6116 mem_next
= mem
->next
;
6119 hibernate_hash_insert_page(mem
);
6121 hibernate_rebuild_hash_list
= NULL
;
6123 clock_get_uptime(&endTime
);
6124 SUB_ABSOLUTETIME(&endTime
, &startTime
);
6125 absolutetime_to_nanoseconds(endTime
, &nsec
);
6127 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec
/ 1000000ULL);
6129 hibernate_rebuild_needed
= FALSE
;
6131 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 13) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
6135 extern void hibernate_teardown_pmap_structs(addr64_t
*, addr64_t
*);
6138 hibernate_teardown_vm_structs(hibernate_page_list_t
*page_list
, hibernate_page_list_t
*page_list_wired
)
6141 unsigned int compact_target_indx
;
6142 vm_page_t mem
, mem_next
;
6143 vm_page_bucket_t
*bucket
;
6144 unsigned int mark_as_unneeded_pages
= 0;
6145 unsigned int unneeded_vm_page_bucket_pages
= 0;
6146 unsigned int unneeded_vm_pages_pages
= 0;
6147 unsigned int unneeded_pmap_pages
= 0;
6148 addr64_t start_of_unneeded
= 0;
6149 addr64_t end_of_unneeded
= 0;
6152 if (hibernate_should_abort())
6155 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6156 vm_page_wire_count
, vm_page_free_count
, vm_page_active_count
, vm_page_inactive_count
, vm_page_speculative_count
,
6157 vm_page_cleaned_count
, compressor_object
->resident_page_count
);
6159 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
6161 bucket
= &vm_page_buckets
[i
];
6163 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem_next
) {
6165 assert(mem
->tabled
);
6167 mem_next
= mem
->next
;
6169 if (mem
< &vm_pages
[0] || mem
>= &vm_pages
[vm_pages_count
]) {
6170 mem
->next
= hibernate_rebuild_hash_list
;
6171 hibernate_rebuild_hash_list
= mem
;
6175 unneeded_vm_page_bucket_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_page_buckets
[0], (addr64_t
)&vm_page_buckets
[vm_page_bucket_count
], page_list
, page_list_wired
);
6176 mark_as_unneeded_pages
+= unneeded_vm_page_bucket_pages
;
6178 hibernate_teardown_vm_page_free_count
= vm_page_free_count
;
6180 compact_target_indx
= 0;
6182 for (i
= 0; i
< vm_pages_count
; i
++) {
6190 assert(!mem
->lopage
);
6192 color
= mem
->phys_page
& vm_color_mask
;
6194 queue_remove(&vm_page_queue_free
[color
],
6198 mem
->pageq
.next
= NULL
;
6199 mem
->pageq
.prev
= NULL
;
6201 vm_page_free_count
--;
6203 hibernate_teardown_found_free_pages
++;
6205 if ( !vm_pages
[compact_target_indx
].free
)
6206 compact_target_indx
= i
;
6209 * record this vm_page_t's original location
6210 * we need this even if it doesn't get moved
6211 * as an indicator to the rebuild function that
6212 * we don't have to move it
6216 if (vm_pages
[compact_target_indx
].free
) {
6218 * we've got a hole to fill, so
6219 * move this vm_page_t to it's new home
6221 vm_pages
[compact_target_indx
] = *mem
;
6224 hibernate_teardown_last_valid_compact_indx
= compact_target_indx
;
6225 compact_target_indx
++;
6227 hibernate_teardown_last_valid_compact_indx
= i
;
6230 unneeded_vm_pages_pages
= hibernate_mark_as_unneeded((addr64_t
)&vm_pages
[hibernate_teardown_last_valid_compact_indx
+1],
6231 (addr64_t
)&vm_pages
[vm_pages_count
-1], page_list
, page_list_wired
);
6232 mark_as_unneeded_pages
+= unneeded_vm_pages_pages
;
6234 hibernate_teardown_pmap_structs(&start_of_unneeded
, &end_of_unneeded
);
6236 if (start_of_unneeded
) {
6237 unneeded_pmap_pages
= hibernate_mark_as_unneeded(start_of_unneeded
, end_of_unneeded
, page_list
, page_list_wired
);
6238 mark_as_unneeded_pages
+= unneeded_pmap_pages
;
6240 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages
, unneeded_vm_pages_pages
, unneeded_pmap_pages
);
6242 hibernate_rebuild_needed
= TRUE
;
6244 return (mark_as_unneeded_pages
);
6248 #endif /* HIBERNATION */
6250 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6252 #include <mach_vm_debug.h>
6255 #include <mach_debug/hash_info.h>
6256 #include <vm/vm_debug.h>
6259 * Routine: vm_page_info
6261 * Return information about the global VP table.
6262 * Fills the buffer with as much information as possible
6263 * and returns the desired size of the buffer.
6265 * Nothing locked. The caller should provide
6266 * possibly-pageable memory.
6271 hash_info_bucket_t
*info
,
6275 lck_spin_t
*bucket_lock
;
6277 if (vm_page_bucket_count
< count
)
6278 count
= vm_page_bucket_count
;
6280 for (i
= 0; i
< count
; i
++) {
6281 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
6282 unsigned int bucket_count
= 0;
6285 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
6286 lck_spin_lock(bucket_lock
);
6288 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
6291 lck_spin_unlock(bucket_lock
);
6293 /* don't touch pageable memory while holding locks */
6294 info
[i
].hib_count
= bucket_count
;
6297 return vm_page_bucket_count
;
6299 #endif /* MACH_VM_DEBUG */