2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
62 * Resident memory management module.
66 #include <libkern/OSAtomic.h>
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
88 #include <ppc/mappings.h> /* (BRINGUP) */
89 #include <pexpert/pexpert.h> /* (BRINGUP) */
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
95 #include <IOKit/IOHibernatePrivate.h>
99 #include <sys/kern_memorystatus.h>
102 #include <sys/kdebug.h>
104 boolean_t vm_page_free_verify
= TRUE
;
106 uint_t vm_lopage_free_count
= 0;
107 uint_t vm_lopage_free_limit
= 0;
108 uint_t vm_lopage_lowater
= 0;
109 boolean_t vm_lopage_refill
= FALSE
;
110 boolean_t vm_lopage_needed
= FALSE
;
112 lck_mtx_ext_t vm_page_queue_lock_ext
;
113 lck_mtx_ext_t vm_page_queue_free_lock_ext
;
114 lck_mtx_ext_t vm_purgeable_queue_lock_ext
;
116 int speculative_age_index
= 0;
117 int speculative_steal_index
= 0;
118 struct vm_speculative_age_q vm_page_queue_speculative
[VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ 1];
121 __private_extern__
void vm_page_init_lck_grp(void);
123 static void vm_page_free_prepare(vm_page_t page
);
128 * Associated with page of user-allocatable memory is a
133 * These variables record the values returned by vm_page_bootstrap,
134 * for debugging purposes. The implementation of pmap_steal_memory
135 * and pmap_startup here also uses them internally.
138 vm_offset_t virtual_space_start
;
139 vm_offset_t virtual_space_end
;
143 * The vm_page_lookup() routine, which provides for fast
144 * (virtual memory object, offset) to page lookup, employs
145 * the following hash table. The vm_page_{insert,remove}
146 * routines install and remove associations in the table.
147 * [This table is often called the virtual-to-physical,
152 #if MACH_PAGE_HASH_STATS
153 int cur_count
; /* current count */
154 int hi_count
; /* high water mark */
155 #endif /* MACH_PAGE_HASH_STATS */
159 #define BUCKETS_PER_LOCK 16
161 vm_page_bucket_t
*vm_page_buckets
; /* Array of buckets */
162 unsigned int vm_page_bucket_count
= 0; /* How big is array? */
163 unsigned int vm_page_hash_mask
; /* Mask for hash function */
164 unsigned int vm_page_hash_shift
; /* Shift for hash function */
165 uint32_t vm_page_bucket_hash
; /* Basic bucket hash */
166 unsigned int vm_page_bucket_lock_count
= 0; /* How big is array of locks? */
168 lck_spin_t
*vm_page_bucket_locks
;
171 #if MACH_PAGE_HASH_STATS
172 /* This routine is only for debug. It is intended to be called by
173 * hand by a developer using a kernel debugger. This routine prints
174 * out vm_page_hash table statistics to the kernel debug console.
184 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
185 if (vm_page_buckets
[i
].hi_count
) {
187 highsum
+= vm_page_buckets
[i
].hi_count
;
188 if (vm_page_buckets
[i
].hi_count
> maxdepth
)
189 maxdepth
= vm_page_buckets
[i
].hi_count
;
192 printf("Total number of buckets: %d\n", vm_page_bucket_count
);
193 printf("Number used buckets: %d = %d%%\n",
194 numbuckets
, 100*numbuckets
/vm_page_bucket_count
);
195 printf("Number unused buckets: %d = %d%%\n",
196 vm_page_bucket_count
- numbuckets
,
197 100*(vm_page_bucket_count
-numbuckets
)/vm_page_bucket_count
);
198 printf("Sum of bucket max depth: %d\n", highsum
);
199 printf("Average bucket depth: %d.%2d\n",
200 highsum
/vm_page_bucket_count
,
201 highsum%vm_page_bucket_count
);
202 printf("Maximum bucket depth: %d\n", maxdepth
);
204 #endif /* MACH_PAGE_HASH_STATS */
207 * The virtual page size is currently implemented as a runtime
208 * variable, but is constant once initialized using vm_set_page_size.
209 * This initialization must be done in the machine-dependent
210 * bootstrap sequence, before calling other machine-independent
213 * All references to the virtual page size outside this
214 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
217 vm_size_t page_size
= PAGE_SIZE
;
218 vm_size_t page_mask
= PAGE_MASK
;
219 int page_shift
= PAGE_SHIFT
;
222 * Resident page structures are initialized from
223 * a template (see vm_page_alloc).
225 * When adding a new field to the virtual memory
226 * object structure, be sure to add initialization
227 * (see vm_page_bootstrap).
229 struct vm_page vm_page_template
;
231 vm_page_t vm_pages
= VM_PAGE_NULL
;
232 unsigned int vm_pages_count
= 0;
233 ppnum_t vm_page_lowest
= 0;
236 * Resident pages that represent real memory
237 * are allocated from a set of free lists,
240 unsigned int vm_colors
;
241 unsigned int vm_color_mask
; /* mask is == (vm_colors-1) */
242 unsigned int vm_cache_geometry_colors
= 0; /* set by hw dependent code during startup */
243 queue_head_t vm_page_queue_free
[MAX_COLORS
];
244 vm_page_t vm_page_queue_fictitious
;
245 unsigned int vm_page_free_wanted
;
246 unsigned int vm_page_free_wanted_privileged
;
247 unsigned int vm_page_free_count
;
248 unsigned int vm_page_fictitious_count
;
250 unsigned int vm_page_free_count_minimum
; /* debugging */
253 * Occasionally, the virtual memory system uses
254 * resident page structures that do not refer to
255 * real pages, for example to leave a page with
256 * important state information in the VP table.
258 * These page structures are allocated the way
259 * most other kernel structures are.
262 vm_locks_array_t vm_page_locks
;
263 decl_lck_mtx_data(,vm_page_alloc_lock
)
264 unsigned int io_throttle_zero_fill
;
266 unsigned int vm_page_local_q_count
= 0;
267 unsigned int vm_page_local_q_soft_limit
= 250;
268 unsigned int vm_page_local_q_hard_limit
= 500;
269 struct vplq
*vm_page_local_q
= NULL
;
272 * Fictitious pages don't have a physical address,
273 * but we must initialize phys_page to something.
274 * For debugging, this should be a strange value
275 * that the pmap module can recognize in assertions.
277 ppnum_t vm_page_fictitious_addr
= (ppnum_t
) -1;
280 * Guard pages are not accessible so they don't
281 * need a physical address, but we need to enter
283 * Let's make it recognizable and make sure that
284 * we don't use a real physical page with that
287 ppnum_t vm_page_guard_addr
= (ppnum_t
) -2;
290 * Resident page structures are also chained on
291 * queues that are used by the page replacement
292 * system (pageout daemon). These queues are
293 * defined here, but are shared by the pageout
294 * module. The inactive queue is broken into
295 * inactive and zf for convenience as the
296 * pageout daemon often assignes a higher
297 * affinity to zf pages
299 queue_head_t vm_page_queue_active
;
300 queue_head_t vm_page_queue_inactive
;
301 queue_head_t vm_page_queue_zf
; /* inactive memory queue for zero fill */
302 queue_head_t vm_page_queue_throttled
;
304 unsigned int vm_page_active_count
;
305 unsigned int vm_page_inactive_count
;
306 unsigned int vm_page_throttled_count
;
307 unsigned int vm_page_speculative_count
;
308 unsigned int vm_page_wire_count
;
309 unsigned int vm_page_wire_count_initial
;
310 unsigned int vm_page_gobble_count
= 0;
311 unsigned int vm_page_wire_count_warning
= 0;
312 unsigned int vm_page_gobble_count_warning
= 0;
314 unsigned int vm_page_purgeable_count
= 0; /* # of pages purgeable now */
315 unsigned int vm_page_purgeable_wired_count
= 0; /* # of purgeable pages that are wired now */
316 uint64_t vm_page_purged_count
= 0; /* total count of purged pages */
318 #if DEVELOPMENT || DEBUG
319 unsigned int vm_page_speculative_recreated
= 0;
320 unsigned int vm_page_speculative_created
= 0;
321 unsigned int vm_page_speculative_used
= 0;
324 uint64_t max_valid_dma_address
= 0xffffffffffffffffULL
;
325 ppnum_t max_valid_low_ppnum
= 0xffffffff;
329 * Several page replacement parameters are also
330 * shared with this module, so that page allocation
331 * (done here in vm_page_alloc) can trigger the
334 unsigned int vm_page_free_target
= 0;
335 unsigned int vm_page_free_min
= 0;
336 unsigned int vm_page_throttle_limit
= 0;
337 uint32_t vm_page_creation_throttle
= 0;
338 unsigned int vm_page_inactive_target
= 0;
339 unsigned int vm_page_inactive_min
= 0;
340 unsigned int vm_page_free_reserved
= 0;
341 unsigned int vm_page_throttle_count
= 0;
344 * The VM system has a couple of heuristics for deciding
345 * that pages are "uninteresting" and should be placed
346 * on the inactive queue as likely candidates for replacement.
347 * These variables let the heuristics be controlled at run-time
348 * to make experimentation easier.
351 boolean_t vm_page_deactivate_hint
= TRUE
;
353 struct vm_page_stats_reusable vm_page_stats_reusable
;
358 * Sets the page size, perhaps based upon the memory
359 * size. Must be called before any use of page-size
360 * dependent functions.
362 * Sets page_shift and page_mask from page_size.
365 vm_set_page_size(void)
367 page_mask
= page_size
- 1;
369 if ((page_mask
& page_size
) != 0)
370 panic("vm_set_page_size: page size not a power of two");
372 for (page_shift
= 0; ; page_shift
++)
373 if ((1U << page_shift
) == page_size
)
378 /* Called once during statup, once the cache geometry is known.
381 vm_page_set_colors( void )
383 unsigned int n
, override
;
385 if ( PE_parse_boot_argn("colors", &override
, sizeof (override
)) ) /* colors specified as a boot-arg? */
387 else if ( vm_cache_geometry_colors
) /* do we know what the cache geometry is? */
388 n
= vm_cache_geometry_colors
;
389 else n
= DEFAULT_COLORS
; /* use default if all else fails */
393 if ( n
> MAX_COLORS
)
396 /* the count must be a power of 2 */
397 if ( ( n
& (n
- 1)) != 0 )
398 panic("vm_page_set_colors");
401 vm_color_mask
= n
- 1;
405 lck_grp_t vm_page_lck_grp_free
;
406 lck_grp_t vm_page_lck_grp_queue
;
407 lck_grp_t vm_page_lck_grp_local
;
408 lck_grp_t vm_page_lck_grp_purge
;
409 lck_grp_t vm_page_lck_grp_alloc
;
410 lck_grp_t vm_page_lck_grp_bucket
;
411 lck_grp_attr_t vm_page_lck_grp_attr
;
412 lck_attr_t vm_page_lck_attr
;
415 __private_extern__
void
416 vm_page_init_lck_grp(void)
419 * initialze the vm_page lock world
421 lck_grp_attr_setdefault(&vm_page_lck_grp_attr
);
422 lck_grp_init(&vm_page_lck_grp_free
, "vm_page_free", &vm_page_lck_grp_attr
);
423 lck_grp_init(&vm_page_lck_grp_queue
, "vm_page_queue", &vm_page_lck_grp_attr
);
424 lck_grp_init(&vm_page_lck_grp_local
, "vm_page_queue_local", &vm_page_lck_grp_attr
);
425 lck_grp_init(&vm_page_lck_grp_purge
, "vm_page_purge", &vm_page_lck_grp_attr
);
426 lck_grp_init(&vm_page_lck_grp_alloc
, "vm_page_alloc", &vm_page_lck_grp_attr
);
427 lck_grp_init(&vm_page_lck_grp_bucket
, "vm_page_bucket", &vm_page_lck_grp_attr
);
428 lck_attr_setdefault(&vm_page_lck_attr
);
432 vm_page_init_local_q()
434 unsigned int num_cpus
;
436 struct vplq
*t_local_q
;
438 num_cpus
= ml_get_max_cpus();
441 * no point in this for a uni-processor system
444 t_local_q
= (struct vplq
*)kalloc(num_cpus
* sizeof(struct vplq
));
446 for (i
= 0; i
< num_cpus
; i
++) {
449 lq
= &t_local_q
[i
].vpl_un
.vpl
;
450 VPL_LOCK_INIT(lq
, &vm_page_lck_grp_local
, &vm_page_lck_attr
);
451 queue_init(&lq
->vpl_queue
);
454 vm_page_local_q_count
= num_cpus
;
456 vm_page_local_q
= (struct vplq
*)t_local_q
;
461 uint64_t initial_max_mem
;
462 int initial_wire_count
;
463 int initial_free_count
;
464 int initial_lopage_count
;
469 * Initializes the resident memory module.
471 * Allocates memory for the page cells, and
472 * for the object/offset-to-page hash table headers.
473 * Each page cell is initialized and placed on the free list.
474 * Returns the range of available kernel virtual memory.
482 register vm_page_t m
;
489 * Initialize the vm_page template.
492 m
= &vm_page_template
;
493 bzero(m
, sizeof (*m
));
495 m
->pageq
.next
= NULL
;
496 m
->pageq
.prev
= NULL
;
497 m
->listq
.next
= NULL
;
498 m
->listq
.prev
= NULL
;
499 m
->next
= VM_PAGE_NULL
;
501 m
->object
= VM_OBJECT_NULL
; /* reset later */
502 m
->offset
= (vm_object_offset_t
) -1; /* reset later */
508 m
->pageout_queue
= FALSE
;
509 m
->speculative
= FALSE
;
512 m
->reference
= FALSE
;
515 m
->throttled
= FALSE
;
516 m
->__unused_pageq_bits
= 0;
518 m
->phys_page
= 0; /* reset later */
523 m
->fictitious
= FALSE
;
532 m
->clustered
= FALSE
;
533 m
->overwriting
= FALSE
;
536 m
->encrypted
= FALSE
;
537 m
->encrypted_cleaning
= FALSE
;
538 m
->list_req_pending
= FALSE
;
539 m
->dump_cleaning
= FALSE
;
540 m
->cs_validated
= FALSE
;
541 m
->cs_tainted
= FALSE
;
543 m
->zero_fill
= FALSE
;
545 m
->__unused_object_bits
= 0;
549 * Initialize the page queues.
551 vm_page_init_lck_grp();
553 lck_mtx_init_ext(&vm_page_queue_free_lock
, &vm_page_queue_free_lock_ext
, &vm_page_lck_grp_free
, &vm_page_lck_attr
);
554 lck_mtx_init_ext(&vm_page_queue_lock
, &vm_page_queue_lock_ext
, &vm_page_lck_grp_queue
, &vm_page_lck_attr
);
555 lck_mtx_init_ext(&vm_purgeable_queue_lock
, &vm_purgeable_queue_lock_ext
, &vm_page_lck_grp_purge
, &vm_page_lck_attr
);
557 for (i
= 0; i
< PURGEABLE_Q_TYPE_MAX
; i
++) {
560 purgeable_queues
[i
].token_q_head
= 0;
561 purgeable_queues
[i
].token_q_tail
= 0;
562 for (group
= 0; group
< NUM_VOLATILE_GROUPS
; group
++)
563 queue_init(&purgeable_queues
[i
].objq
[group
]);
565 purgeable_queues
[i
].type
= i
;
566 purgeable_queues
[i
].new_pages
= 0;
568 purgeable_queues
[i
].debug_count_tokens
= 0;
569 purgeable_queues
[i
].debug_count_objects
= 0;
573 for (i
= 0; i
< MAX_COLORS
; i
++ )
574 queue_init(&vm_page_queue_free
[i
]);
575 queue_init(&vm_lopage_queue_free
);
576 vm_page_queue_fictitious
= VM_PAGE_NULL
;
577 queue_init(&vm_page_queue_active
);
578 queue_init(&vm_page_queue_inactive
);
579 queue_init(&vm_page_queue_throttled
);
580 queue_init(&vm_page_queue_zf
);
582 for ( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ ) {
583 queue_init(&vm_page_queue_speculative
[i
].age_q
);
585 vm_page_queue_speculative
[i
].age_ts
.tv_sec
= 0;
586 vm_page_queue_speculative
[i
].age_ts
.tv_nsec
= 0;
588 vm_page_free_wanted
= 0;
589 vm_page_free_wanted_privileged
= 0;
591 vm_page_set_colors();
595 * Steal memory for the map and zone subsystems.
598 vm_map_steal_memory();
602 * Allocate (and initialize) the virtual-to-physical
603 * table hash buckets.
605 * The number of buckets should be a power of two to
606 * get a good hash function. The following computation
607 * chooses the first power of two that is greater
608 * than the number of physical pages in the system.
611 if (vm_page_bucket_count
== 0) {
612 unsigned int npages
= pmap_free_pages();
614 vm_page_bucket_count
= 1;
615 while (vm_page_bucket_count
< npages
)
616 vm_page_bucket_count
<<= 1;
618 vm_page_bucket_lock_count
= (vm_page_bucket_count
+ BUCKETS_PER_LOCK
- 1) / BUCKETS_PER_LOCK
;
620 vm_page_hash_mask
= vm_page_bucket_count
- 1;
623 * Calculate object shift value for hashing algorithm:
624 * O = log2(sizeof(struct vm_object))
625 * B = log2(vm_page_bucket_count)
626 * hash shifts the object left by
629 size
= vm_page_bucket_count
;
630 for (log1
= 0; size
> 1; log1
++)
632 size
= sizeof(struct vm_object
);
633 for (log2
= 0; size
> 1; log2
++)
635 vm_page_hash_shift
= log1
/2 - log2
+ 1;
637 vm_page_bucket_hash
= 1 << ((log1
+ 1) >> 1); /* Get (ceiling of sqrt of table size) */
638 vm_page_bucket_hash
|= 1 << ((log1
+ 1) >> 2); /* Get (ceiling of quadroot of table size) */
639 vm_page_bucket_hash
|= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
641 if (vm_page_hash_mask
& vm_page_bucket_count
)
642 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
644 vm_page_buckets
= (vm_page_bucket_t
*)
645 pmap_steal_memory(vm_page_bucket_count
*
646 sizeof(vm_page_bucket_t
));
648 vm_page_bucket_locks
= (lck_spin_t
*)
649 pmap_steal_memory(vm_page_bucket_lock_count
*
652 for (i
= 0; i
< vm_page_bucket_count
; i
++) {
653 register vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
655 bucket
->pages
= VM_PAGE_NULL
;
656 #if MACH_PAGE_HASH_STATS
657 bucket
->cur_count
= 0;
658 bucket
->hi_count
= 0;
659 #endif /* MACH_PAGE_HASH_STATS */
662 for (i
= 0; i
< vm_page_bucket_lock_count
; i
++)
663 lck_spin_init(&vm_page_bucket_locks
[i
], &vm_page_lck_grp_bucket
, &vm_page_lck_attr
);
666 * Machine-dependent code allocates the resident page table.
667 * It uses vm_page_init to initialize the page frames.
668 * The code also returns to us the virtual space available
669 * to the kernel. We don't trust the pmap module
670 * to get the alignment right.
673 pmap_startup(&virtual_space_start
, &virtual_space_end
);
674 virtual_space_start
= round_page(virtual_space_start
);
675 virtual_space_end
= trunc_page(virtual_space_end
);
677 *startp
= virtual_space_start
;
678 *endp
= virtual_space_end
;
681 * Compute the initial "wire" count.
682 * Up until now, the pages which have been set aside are not under
683 * the VM system's control, so although they aren't explicitly
684 * wired, they nonetheless can't be moved. At this moment,
685 * all VM managed pages are "free", courtesy of pmap_startup.
687 assert((unsigned int) atop_64(max_mem
) == atop_64(max_mem
));
688 vm_page_wire_count
= ((unsigned int) atop_64(max_mem
)) - vm_page_free_count
- vm_lopage_free_count
; /* initial value */
689 vm_page_wire_count_initial
= vm_page_wire_count
;
690 vm_page_free_count_minimum
= vm_page_free_count
;
692 initial_max_mem
= max_mem
;
693 initial_wire_count
= vm_page_wire_count
;
694 initial_free_count
= vm_page_free_count
;
695 initial_lopage_count
= vm_lopage_free_count
;
697 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
698 vm_page_free_count
, vm_page_wire_count
);
700 simple_lock_init(&vm_paging_lock
, 0);
703 #ifndef MACHINE_PAGES
705 * We implement pmap_steal_memory and pmap_startup with the help
706 * of two simpler functions, pmap_virtual_space and pmap_next_page.
713 vm_offset_t addr
, vaddr
;
717 * We round the size to a round multiple.
720 size
= (size
+ sizeof (void *) - 1) &~ (sizeof (void *) - 1);
723 * If this is the first call to pmap_steal_memory,
724 * we have to initialize ourself.
727 if (virtual_space_start
== virtual_space_end
) {
728 pmap_virtual_space(&virtual_space_start
, &virtual_space_end
);
731 * The initial values must be aligned properly, and
732 * we don't trust the pmap module to do it right.
735 virtual_space_start
= round_page(virtual_space_start
);
736 virtual_space_end
= trunc_page(virtual_space_end
);
740 * Allocate virtual memory for this request.
743 addr
= virtual_space_start
;
744 virtual_space_start
+= size
;
746 kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr
, (long)virtual_space_start
, (long)size
); /* (TEST/DEBUG) */
749 * Allocate and map physical pages to back new virtual pages.
752 for (vaddr
= round_page(addr
);
754 vaddr
+= PAGE_SIZE
) {
756 if (!pmap_next_page_hi(&phys_page
))
757 panic("pmap_steal_memory");
760 * XXX Logically, these mappings should be wired,
761 * but some pmap modules barf if they are.
763 #if defined(__LP64__)
764 pmap_pre_expand(kernel_pmap
, vaddr
);
767 pmap_enter(kernel_pmap
, vaddr
, phys_page
,
768 VM_PROT_READ
|VM_PROT_WRITE
,
769 VM_WIMG_USE_DEFAULT
, FALSE
);
771 * Account for newly stolen memory
773 vm_page_wire_count
++;
777 return (void *) addr
;
785 unsigned int i
, npages
, pages_initialized
, fill
, fillval
;
790 * We calculate how many page frames we will have
791 * and then allocate the page structures in one chunk.
794 tmpaddr
= (addr64_t
)pmap_free_pages() * (addr64_t
)PAGE_SIZE
; /* Get the amount of memory left */
795 tmpaddr
= tmpaddr
+ (addr64_t
)(round_page(virtual_space_start
) - virtual_space_start
); /* Account for any slop */
796 npages
= (unsigned int)(tmpaddr
/ (addr64_t
)(PAGE_SIZE
+ sizeof(*vm_pages
))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
798 vm_pages
= (vm_page_t
) pmap_steal_memory(npages
* sizeof *vm_pages
);
801 * Initialize the page frames.
803 for (i
= 0, pages_initialized
= 0; i
< npages
; i
++) {
804 if (!pmap_next_page(&phys_page
))
806 if (pages_initialized
== 0 || phys_page
< vm_page_lowest
)
807 vm_page_lowest
= phys_page
;
809 vm_page_init(&vm_pages
[i
], phys_page
, FALSE
);
813 vm_pages_count
= pages_initialized
;
816 * Check if we want to initialize pages to a known value
818 fill
= 0; /* Assume no fill */
819 if (PE_parse_boot_argn("fill", &fillval
, sizeof (fillval
))) fill
= 1; /* Set fill */
821 // -debug code remove
822 if (2 == vm_himemory_mode
) {
823 // free low -> high so high is preferred
824 for (i
= 1; i
<= pages_initialized
; i
++) {
825 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
826 vm_page_release(&vm_pages
[i
- 1]);
830 // debug code remove-
833 * Release pages in reverse order so that physical pages
834 * initially get allocated in ascending addresses. This keeps
835 * the devices (which must address physical memory) happy if
836 * they require several consecutive pages.
838 for (i
= pages_initialized
; i
> 0; i
--) {
839 if(fill
) fillPage(vm_pages
[i
- 1].phys_page
, fillval
); /* Fill the page with a know value if requested at boot */
840 vm_page_release(&vm_pages
[i
- 1]);
845 vm_page_t xx
, xxo
, xxl
;
848 j
= 0; /* (BRINGUP) */
851 for( i
= 0; i
< vm_colors
; i
++ ) {
852 queue_iterate(&vm_page_queue_free
[i
],
855 pageq
) { /* BRINGUP */
857 if(j
> vm_page_free_count
) { /* (BRINGUP) */
858 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx
, xxl
);
861 l
= vm_page_free_count
- j
; /* (BRINGUP) */
862 k
= 0; /* (BRINGUP) */
864 if(((j
- 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j
, vm_page_free_count
);
866 for(xxo
= xx
->pageq
.next
; xxo
!= &vm_page_queue_free
[i
]; xxo
= xxo
->pageq
.next
) { /* (BRINGUP) */
868 if(k
> l
) panic("pmap_startup: too many in secondary check %d %d\n", k
, l
);
869 if((xx
->phys_page
& 0xFFFFFFFF) == (xxo
->phys_page
& 0xFFFFFFFF)) { /* (BRINGUP) */
870 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx
, xxo
);
878 if(j
!= vm_page_free_count
) { /* (BRINGUP) */
879 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j
, vm_page_free_count
);
886 * We have to re-align virtual_space_start,
887 * because pmap_steal_memory has been using it.
890 virtual_space_start
= round_page(virtual_space_start
);
892 *startp
= virtual_space_start
;
893 *endp
= virtual_space_end
;
895 #endif /* MACHINE_PAGES */
898 * Routine: vm_page_module_init
900 * Second initialization pass, to be done after
901 * the basic VM system is ready.
904 vm_page_module_init(void)
906 vm_page_zone
= zinit((vm_size_t
) sizeof(struct vm_page
),
907 0, PAGE_SIZE
, "vm pages");
910 zone_debug_disable(vm_page_zone
);
911 #endif /* ZONE_DEBUG */
913 zone_change(vm_page_zone
, Z_EXPAND
, FALSE
);
914 zone_change(vm_page_zone
, Z_EXHAUST
, TRUE
);
915 zone_change(vm_page_zone
, Z_FOREIGN
, TRUE
);
918 * Adjust zone statistics to account for the real pages allocated
919 * in vm_page_create(). [Q: is this really what we want?]
921 vm_page_zone
->count
+= vm_page_pages
;
922 vm_page_zone
->cur_size
+= vm_page_pages
* vm_page_zone
->elem_size
;
924 lck_mtx_init(&vm_page_alloc_lock
, &vm_page_lck_grp_alloc
, &vm_page_lck_attr
);
928 * Routine: vm_page_create
930 * After the VM system is up, machine-dependent code
931 * may stumble across more physical memory. For example,
932 * memory that it was reserving for a frame buffer.
933 * vm_page_create turns this memory into available pages.
944 for (phys_page
= start
;
947 while ((m
= (vm_page_t
) vm_page_grab_fictitious())
949 vm_page_more_fictitious();
951 vm_page_init(m
, phys_page
, FALSE
);
952 pmap_clear_noencrypt(phys_page
);
961 * Distributes the object/offset key pair among hash buckets.
963 * NOTE: The bucket count must be a power of 2
965 #define vm_page_hash(object, offset) (\
966 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
971 * vm_page_insert: [ internal use only ]
973 * Inserts the given mem entry into the object/object-page
974 * table and object list.
976 * The object must be locked.
982 vm_object_offset_t offset
)
984 vm_page_insert_internal(mem
, object
, offset
, FALSE
, TRUE
);
988 vm_page_insert_internal(
991 vm_object_offset_t offset
,
992 boolean_t queues_lock_held
,
993 boolean_t insert_in_hash
)
995 vm_page_bucket_t
*bucket
;
996 lck_spin_t
*bucket_lock
;
1000 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1001 object
, offset
, mem
, 0,0);
1005 if (object
== vm_submap_object
) {
1006 /* the vm_submap_object is only a placeholder for submaps */
1007 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset
);
1010 vm_object_lock_assert_exclusive(object
);
1012 lck_mtx_assert(&vm_page_queue_lock
,
1013 queues_lock_held
? LCK_MTX_ASSERT_OWNED
1014 : LCK_MTX_ASSERT_NOTOWNED
);
1017 if (insert_in_hash
== TRUE
) {
1019 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1020 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1021 "already in (obj=%p,off=0x%llx)",
1022 mem
, object
, offset
, mem
->object
, mem
->offset
);
1024 assert(!object
->internal
|| offset
< object
->size
);
1026 /* only insert "pageout" pages into "pageout" objects,
1027 * and normal pages into normal objects */
1028 assert(object
->pageout
== mem
->pageout
);
1030 assert(vm_page_lookup(object
, offset
) == VM_PAGE_NULL
);
1033 * Record the object/offset pair in this page
1036 mem
->object
= object
;
1037 mem
->offset
= offset
;
1040 * Insert it into the object_object/offset hash table
1042 hash_id
= vm_page_hash(object
, offset
);
1043 bucket
= &vm_page_buckets
[hash_id
];
1044 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1046 lck_spin_lock(bucket_lock
);
1048 mem
->next
= bucket
->pages
;
1049 bucket
->pages
= mem
;
1050 #if MACH_PAGE_HASH_STATS
1051 if (++bucket
->cur_count
> bucket
->hi_count
)
1052 bucket
->hi_count
= bucket
->cur_count
;
1053 #endif /* MACH_PAGE_HASH_STATS */
1055 lck_spin_unlock(bucket_lock
);
1058 * Now link into the object's list of backed pages.
1061 VM_PAGE_INSERT(mem
, object
);
1065 * Show that the object has one more resident page.
1068 object
->resident_page_count
++;
1069 if (VM_PAGE_WIRED(mem
)) {
1070 object
->wired_page_count
++;
1072 assert(object
->resident_page_count
>= object
->wired_page_count
);
1074 assert(!mem
->reusable
);
1076 if (object
->purgable
== VM_PURGABLE_VOLATILE
) {
1077 if (VM_PAGE_WIRED(mem
)) {
1078 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
1080 OSAddAtomic(1, &vm_page_purgeable_count
);
1082 } else if (object
->purgable
== VM_PURGABLE_EMPTY
&&
1085 * This page belongs to a purged VM object but hasn't
1086 * been purged (because it was "busy").
1087 * It's in the "throttled" queue and hence not
1088 * visible to vm_pageout_scan(). Move it to a pageable
1089 * queue, so that it can eventually be reclaimed, instead
1090 * of lingering in the "empty" object.
1092 if (queues_lock_held
== FALSE
)
1093 vm_page_lockspin_queues();
1094 vm_page_deactivate(mem
);
1095 if (queues_lock_held
== FALSE
)
1096 vm_page_unlock_queues();
1103 * Exactly like vm_page_insert, except that we first
1104 * remove any existing page at the given offset in object.
1106 * The object must be locked.
1110 register vm_page_t mem
,
1111 register vm_object_t object
,
1112 register vm_object_offset_t offset
)
1114 vm_page_bucket_t
*bucket
;
1115 vm_page_t found_m
= VM_PAGE_NULL
;
1116 lck_spin_t
*bucket_lock
;
1120 vm_object_lock_assert_exclusive(object
);
1122 if (mem
->tabled
|| mem
->object
!= VM_OBJECT_NULL
)
1123 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1124 "already in (obj=%p,off=0x%llx)",
1125 mem
, object
, offset
, mem
->object
, mem
->offset
);
1126 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_NOTOWNED
);
1129 * Record the object/offset pair in this page
1132 mem
->object
= object
;
1133 mem
->offset
= offset
;
1136 * Insert it into the object_object/offset hash table,
1137 * replacing any page that might have been there.
1140 hash_id
= vm_page_hash(object
, offset
);
1141 bucket
= &vm_page_buckets
[hash_id
];
1142 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1144 lck_spin_lock(bucket_lock
);
1146 if (bucket
->pages
) {
1147 vm_page_t
*mp
= &bucket
->pages
;
1151 if (m
->object
== object
&& m
->offset
== offset
) {
1153 * Remove old page from hash list
1161 } while ((m
= *mp
));
1163 mem
->next
= bucket
->pages
;
1165 mem
->next
= VM_PAGE_NULL
;
1168 * insert new page at head of hash list
1170 bucket
->pages
= mem
;
1172 lck_spin_unlock(bucket_lock
);
1176 * there was already a page at the specified
1177 * offset for this object... remove it from
1178 * the object and free it back to the free list
1180 vm_page_free_unlocked(found_m
, FALSE
);
1182 vm_page_insert_internal(mem
, object
, offset
, FALSE
, FALSE
);
1186 * vm_page_remove: [ internal use only ]
1188 * Removes the given mem entry from the object/offset-page
1189 * table and the object page list.
1191 * The object must be locked.
1197 boolean_t remove_from_hash
)
1199 vm_page_bucket_t
*bucket
;
1201 lck_spin_t
*bucket_lock
;
1205 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1206 mem
->object
, mem
->offset
,
1209 vm_object_lock_assert_exclusive(mem
->object
);
1210 assert(mem
->tabled
);
1211 assert(!mem
->cleaning
);
1214 if (remove_from_hash
== TRUE
) {
1216 * Remove from the object_object/offset hash table
1218 hash_id
= vm_page_hash(mem
->object
, mem
->offset
);
1219 bucket
= &vm_page_buckets
[hash_id
];
1220 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1222 lck_spin_lock(bucket_lock
);
1224 if ((this = bucket
->pages
) == mem
) {
1225 /* optimize for common case */
1227 bucket
->pages
= mem
->next
;
1231 for (prev
= &this->next
;
1232 (this = *prev
) != mem
;
1237 #if MACH_PAGE_HASH_STATS
1238 bucket
->cur_count
--;
1239 #endif /* MACH_PAGE_HASH_STATS */
1241 lck_spin_unlock(bucket_lock
);
1244 * Now remove from the object's list of backed pages.
1247 VM_PAGE_REMOVE(mem
);
1250 * And show that the object has one fewer resident
1254 assert(mem
->object
->resident_page_count
> 0);
1255 mem
->object
->resident_page_count
--;
1256 if (VM_PAGE_WIRED(mem
)) {
1257 assert(mem
->object
->wired_page_count
> 0);
1258 mem
->object
->wired_page_count
--;
1260 assert(mem
->object
->resident_page_count
>=
1261 mem
->object
->wired_page_count
);
1262 if (mem
->reusable
) {
1263 assert(mem
->object
->reusable_page_count
> 0);
1264 mem
->object
->reusable_page_count
--;
1265 assert(mem
->object
->reusable_page_count
<=
1266 mem
->object
->resident_page_count
);
1267 mem
->reusable
= FALSE
;
1268 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1269 vm_page_stats_reusable
.reused_remove
++;
1270 } else if (mem
->object
->all_reusable
) {
1271 OSAddAtomic(-1, &vm_page_stats_reusable
.reusable_count
);
1272 vm_page_stats_reusable
.reused_remove
++;
1275 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
1276 if (VM_PAGE_WIRED(mem
)) {
1277 assert(vm_page_purgeable_wired_count
> 0);
1278 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
1280 assert(vm_page_purgeable_count
> 0);
1281 OSAddAtomic(-1, &vm_page_purgeable_count
);
1284 mem
->tabled
= FALSE
;
1285 mem
->object
= VM_OBJECT_NULL
;
1286 mem
->offset
= (vm_object_offset_t
) -1;
1293 * Returns the page associated with the object/offset
1294 * pair specified; if none is found, VM_PAGE_NULL is returned.
1296 * The object must be locked. No side effects.
1299 unsigned long vm_page_lookup_hint
= 0;
1300 unsigned long vm_page_lookup_hint_next
= 0;
1301 unsigned long vm_page_lookup_hint_prev
= 0;
1302 unsigned long vm_page_lookup_hint_miss
= 0;
1303 unsigned long vm_page_lookup_bucket_NULL
= 0;
1304 unsigned long vm_page_lookup_miss
= 0;
1310 vm_object_offset_t offset
)
1313 vm_page_bucket_t
*bucket
;
1315 lck_spin_t
*bucket_lock
;
1318 vm_object_lock_assert_held(object
);
1319 mem
= object
->memq_hint
;
1321 if (mem
!= VM_PAGE_NULL
) {
1322 assert(mem
->object
== object
);
1324 if (mem
->offset
== offset
) {
1325 vm_page_lookup_hint
++;
1328 qe
= queue_next(&mem
->listq
);
1330 if (! queue_end(&object
->memq
, qe
)) {
1331 vm_page_t next_page
;
1333 next_page
= (vm_page_t
) qe
;
1334 assert(next_page
->object
== object
);
1336 if (next_page
->offset
== offset
) {
1337 vm_page_lookup_hint_next
++;
1338 object
->memq_hint
= next_page
; /* new hint */
1342 qe
= queue_prev(&mem
->listq
);
1344 if (! queue_end(&object
->memq
, qe
)) {
1345 vm_page_t prev_page
;
1347 prev_page
= (vm_page_t
) qe
;
1348 assert(prev_page
->object
== object
);
1350 if (prev_page
->offset
== offset
) {
1351 vm_page_lookup_hint_prev
++;
1352 object
->memq_hint
= prev_page
; /* new hint */
1358 * Search the hash table for this object/offset pair
1360 hash_id
= vm_page_hash(object
, offset
);
1361 bucket
= &vm_page_buckets
[hash_id
];
1364 * since we hold the object lock, we are guaranteed that no
1365 * new pages can be inserted into this object... this in turn
1366 * guarantess that the page we're looking for can't exist
1367 * if the bucket it hashes to is currently NULL even when looked
1368 * at outside the scope of the hash bucket lock... this is a
1369 * really cheap optimiztion to avoid taking the lock
1371 if (bucket
->pages
== VM_PAGE_NULL
) {
1372 vm_page_lookup_bucket_NULL
++;
1374 return (VM_PAGE_NULL
);
1376 bucket_lock
= &vm_page_bucket_locks
[hash_id
/ BUCKETS_PER_LOCK
];
1378 lck_spin_lock(bucket_lock
);
1380 for (mem
= bucket
->pages
; mem
!= VM_PAGE_NULL
; mem
= mem
->next
) {
1382 if ((mem
->object
== object
) && (mem
->offset
== offset
))
1385 lck_spin_unlock(bucket_lock
);
1387 if (mem
!= VM_PAGE_NULL
) {
1388 if (object
->memq_hint
!= VM_PAGE_NULL
) {
1389 vm_page_lookup_hint_miss
++;
1391 assert(mem
->object
== object
);
1392 object
->memq_hint
= mem
;
1394 vm_page_lookup_miss
++;
1403 * Move the given memory entry from its
1404 * current object to the specified target object/offset.
1406 * The object must be locked.
1410 register vm_page_t mem
,
1411 register vm_object_t new_object
,
1412 vm_object_offset_t new_offset
,
1413 boolean_t encrypted_ok
)
1415 assert(mem
->object
!= new_object
);
1419 * The encryption key is based on the page's memory object
1420 * (aka "pager") and paging offset. Moving the page to
1421 * another VM object changes its "pager" and "paging_offset"
1422 * so it has to be decrypted first, or we would lose the key.
1424 * One exception is VM object collapsing, where we transfer pages
1425 * from one backing object to its parent object. This operation also
1426 * transfers the paging information, so the <pager,paging_offset> info
1427 * should remain consistent. The caller (vm_object_do_collapse())
1428 * sets "encrypted_ok" in this case.
1430 if (!encrypted_ok
&& mem
->encrypted
) {
1431 panic("vm_page_rename: page %p is encrypted\n", mem
);
1435 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1436 new_object
, new_offset
,
1440 * Changes to mem->object require the page lock because
1441 * the pageout daemon uses that lock to get the object.
1443 vm_page_lockspin_queues();
1445 vm_page_remove(mem
, TRUE
);
1446 vm_page_insert_internal(mem
, new_object
, new_offset
, TRUE
, TRUE
);
1448 vm_page_unlock_queues();
1454 * Initialize the fields in a new page.
1455 * This takes a structure with random values and initializes it
1456 * so that it can be given to vm_page_release or vm_page_insert.
1466 *mem
= vm_page_template
;
1467 mem
->phys_page
= phys_page
;
1468 mem
->lopage
= lopage
;
1472 * vm_page_grab_fictitious:
1474 * Remove a fictitious page from the free list.
1475 * Returns VM_PAGE_NULL if there are no free pages.
1477 int c_vm_page_grab_fictitious
= 0;
1478 int c_vm_page_release_fictitious
= 0;
1479 int c_vm_page_more_fictitious
= 0;
1481 extern vm_page_t
vm_page_grab_fictitious_common(ppnum_t phys_addr
);
1484 vm_page_grab_fictitious_common(
1487 register vm_page_t m
;
1489 m
= (vm_page_t
)zget(vm_page_zone
);
1491 vm_page_init(m
, phys_addr
, FALSE
);
1492 m
->fictitious
= TRUE
;
1495 c_vm_page_grab_fictitious
++;
1500 vm_page_grab_fictitious(void)
1502 return vm_page_grab_fictitious_common(vm_page_fictitious_addr
);
1506 vm_page_grab_guard(void)
1508 return vm_page_grab_fictitious_common(vm_page_guard_addr
);
1512 * vm_page_release_fictitious:
1514 * Release a fictitious page to the free list.
1518 vm_page_release_fictitious(
1519 register vm_page_t m
)
1523 assert(m
->fictitious
);
1524 assert(m
->phys_page
== vm_page_fictitious_addr
||
1525 m
->phys_page
== vm_page_guard_addr
);
1527 c_vm_page_release_fictitious
++;
1530 panic("vm_page_release_fictitious");
1533 zfree(vm_page_zone
, m
);
1537 * vm_page_more_fictitious:
1539 * Add more fictitious pages to the free list.
1540 * Allowed to block. This routine is way intimate
1541 * with the zones code, for several reasons:
1542 * 1. we need to carve some page structures out of physical
1543 * memory before zones work, so they _cannot_ come from
1545 * 2. the zone needs to be collectable in order to prevent
1546 * growth without bound. These structures are used by
1547 * the device pager (by the hundreds and thousands), as
1548 * private pages for pageout, and as blocking pages for
1549 * pagein. Temporary bursts in demand should not result in
1550 * permanent allocation of a resource.
1551 * 3. To smooth allocation humps, we allocate single pages
1552 * with kernel_memory_allocate(), and cram them into the
1553 * zone. This also allows us to initialize the vm_page_t's
1554 * on the way into the zone, so that zget() always returns
1555 * an initialized structure. The zone free element pointer
1556 * and the free page pointer are both the first item in the
1558 * 4. By having the pages in the zone pre-initialized, we need
1559 * not keep 2 levels of lists. The garbage collector simply
1560 * scans our list, and reduces physical memory usage as it
1564 void vm_page_more_fictitious(void)
1566 register vm_page_t m
;
1568 kern_return_t retval
;
1571 c_vm_page_more_fictitious
++;
1574 * Allocate a single page from the zone_map. Do not wait if no physical
1575 * pages are immediately available, and do not zero the space. We need
1576 * our own blocking lock here to prevent having multiple,
1577 * simultaneous requests from piling up on the zone_map lock. Exactly
1578 * one (of our) threads should be potentially waiting on the map lock.
1579 * If winner is not vm-privileged, then the page allocation will fail,
1580 * and it will temporarily block here in the vm_page_wait().
1582 lck_mtx_lock(&vm_page_alloc_lock
);
1584 * If another thread allocated space, just bail out now.
1586 if (zone_free_count(vm_page_zone
) > 5) {
1588 * The number "5" is a small number that is larger than the
1589 * number of fictitious pages that any single caller will
1590 * attempt to allocate. Otherwise, a thread will attempt to
1591 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1592 * release all of the resources and locks already acquired,
1593 * and then call this routine. This routine finds the pages
1594 * that the caller released, so fails to allocate new space.
1595 * The process repeats infinitely. The largest known number
1596 * of fictitious pages required in this manner is 2. 5 is
1597 * simply a somewhat larger number.
1599 lck_mtx_unlock(&vm_page_alloc_lock
);
1603 retval
= kernel_memory_allocate(zone_map
,
1604 &addr
, PAGE_SIZE
, VM_PROT_ALL
,
1605 KMA_KOBJECT
|KMA_NOPAGEWAIT
);
1606 if (retval
!= KERN_SUCCESS
) {
1608 * No page was available. Tell the pageout daemon, drop the
1609 * lock to give another thread a chance at it, and
1610 * wait for the pageout daemon to make progress.
1612 lck_mtx_unlock(&vm_page_alloc_lock
);
1613 vm_page_wait(THREAD_UNINT
);
1617 * Initialize as many vm_page_t's as will fit on this page. This
1618 * depends on the zone code disturbing ONLY the first item of
1619 * each zone element.
1621 m
= (vm_page_t
)addr
;
1622 for (i
= PAGE_SIZE
/sizeof(struct vm_page
); i
> 0; i
--) {
1623 vm_page_init(m
, vm_page_fictitious_addr
, FALSE
);
1624 m
->fictitious
= TRUE
;
1627 zcram(vm_page_zone
, (void *) addr
, PAGE_SIZE
);
1628 lck_mtx_unlock(&vm_page_alloc_lock
);
1635 * Return true if it is not likely that a non-vm_privileged thread
1636 * can get memory without blocking. Advisory only, since the
1637 * situation may change under us.
1642 /* No locking, at worst we will fib. */
1643 return( vm_page_free_count
<= vm_page_free_reserved
);
1649 * this is an interface to support bring-up of drivers
1650 * on platforms with physical memory > 4G...
1652 int vm_himemory_mode
= 0;
1656 * this interface exists to support hardware controllers
1657 * incapable of generating DMAs with more than 32 bits
1658 * of address on platforms with physical memory > 4G...
1660 unsigned int vm_lopages_allocated_q
= 0;
1661 unsigned int vm_lopages_allocated_cpm_success
= 0;
1662 unsigned int vm_lopages_allocated_cpm_failed
= 0;
1663 queue_head_t vm_lopage_queue_free
;
1666 vm_page_grablo(void)
1670 if (vm_lopage_needed
== FALSE
)
1671 return (vm_page_grab());
1673 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1675 if ( !queue_empty(&vm_lopage_queue_free
)) {
1676 queue_remove_first(&vm_lopage_queue_free
,
1680 assert(vm_lopage_free_count
);
1682 vm_lopage_free_count
--;
1683 vm_lopages_allocated_q
++;
1685 if (vm_lopage_free_count
< vm_lopage_lowater
)
1686 vm_lopage_refill
= TRUE
;
1688 lck_mtx_unlock(&vm_page_queue_free_lock
);
1690 lck_mtx_unlock(&vm_page_queue_free_lock
);
1692 if (cpm_allocate(PAGE_SIZE
, &mem
, atop(0xffffffff), 0, FALSE
, KMA_LOMEM
) != KERN_SUCCESS
) {
1694 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1695 vm_lopages_allocated_cpm_failed
++;
1696 lck_mtx_unlock(&vm_page_queue_free_lock
);
1698 return (VM_PAGE_NULL
);
1702 vm_page_lockspin_queues();
1704 mem
->gobbled
= FALSE
;
1705 vm_page_gobble_count
--;
1706 vm_page_wire_count
--;
1708 vm_lopages_allocated_cpm_success
++;
1709 vm_page_unlock_queues();
1711 assert(mem
->gobbled
);
1714 assert(!mem
->pmapped
);
1715 assert(!mem
->wpmapped
);
1717 mem
->pageq
.next
= NULL
;
1718 mem
->pageq
.prev
= NULL
;
1726 * first try to grab a page from the per-cpu free list...
1727 * this must be done while pre-emption is disabled... if
1728 * a page is available, we're done...
1729 * if no page is available, grab the vm_page_queue_free_lock
1730 * and see if current number of free pages would allow us
1731 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1732 * if there are pages available, disable preemption and
1733 * recheck the state of the per-cpu free list... we could
1734 * have been preempted and moved to a different cpu, or
1735 * some other thread could have re-filled it... if still
1736 * empty, figure out how many pages we can steal from the
1737 * global free queue and move to the per-cpu queue...
1738 * return 1 of these pages when done... only wakeup the
1739 * pageout_scan thread if we moved pages from the global
1740 * list... no need for the wakeup if we've satisfied the
1741 * request from the per-cpu queue.
1744 #define COLOR_GROUPS_TO_STEAL 4
1748 vm_page_grab( void )
1753 disable_preemption();
1755 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1756 return_page_from_cpu_list
:
1757 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1758 PROCESSOR_DATA(current_processor(), free_pages
) = mem
->pageq
.next
;
1759 mem
->pageq
.next
= NULL
;
1761 enable_preemption();
1763 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1764 assert(mem
->tabled
== FALSE
);
1765 assert(mem
->object
== VM_OBJECT_NULL
);
1766 assert(!mem
->laundry
);
1768 assert(pmap_verify_free(mem
->phys_page
));
1770 assert(!mem
->encrypted
);
1771 assert(!mem
->pmapped
);
1772 assert(!mem
->wpmapped
);
1776 enable_preemption();
1780 * Optionally produce warnings if the wire or gobble
1781 * counts exceed some threshold.
1783 if (vm_page_wire_count_warning
> 0
1784 && vm_page_wire_count
>= vm_page_wire_count_warning
) {
1785 printf("mk: vm_page_grab(): high wired page count of %d\n",
1786 vm_page_wire_count
);
1787 assert(vm_page_wire_count
< vm_page_wire_count_warning
);
1789 if (vm_page_gobble_count_warning
> 0
1790 && vm_page_gobble_count
>= vm_page_gobble_count_warning
) {
1791 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1792 vm_page_gobble_count
);
1793 assert(vm_page_gobble_count
< vm_page_gobble_count_warning
);
1796 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1799 * Only let privileged threads (involved in pageout)
1800 * dip into the reserved pool.
1802 if ((vm_page_free_count
< vm_page_free_reserved
) &&
1803 !(current_thread()->options
& TH_OPT_VMPRIV
)) {
1804 lck_mtx_unlock(&vm_page_queue_free_lock
);
1810 unsigned int pages_to_steal
;
1813 while ( vm_page_free_count
== 0 ) {
1815 lck_mtx_unlock(&vm_page_queue_free_lock
);
1817 * must be a privileged thread to be
1818 * in this state since a non-privileged
1819 * thread would have bailed if we were
1820 * under the vm_page_free_reserved mark
1823 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1826 disable_preemption();
1828 if ((mem
= PROCESSOR_DATA(current_processor(), free_pages
))) {
1829 lck_mtx_unlock(&vm_page_queue_free_lock
);
1832 * we got preempted and moved to another processor
1833 * or we got preempted and someone else ran and filled the cache
1835 goto return_page_from_cpu_list
;
1837 if (vm_page_free_count
<= vm_page_free_reserved
)
1840 pages_to_steal
= COLOR_GROUPS_TO_STEAL
* vm_colors
;
1842 if (pages_to_steal
> (vm_page_free_count
- vm_page_free_reserved
))
1843 pages_to_steal
= (vm_page_free_count
- vm_page_free_reserved
);
1845 color
= PROCESSOR_DATA(current_processor(), start_color
);
1848 while (pages_to_steal
--) {
1849 if (--vm_page_free_count
< vm_page_free_count_minimum
)
1850 vm_page_free_count_minimum
= vm_page_free_count
;
1852 while (queue_empty(&vm_page_queue_free
[color
]))
1853 color
= (color
+ 1) & vm_color_mask
;
1855 queue_remove_first(&vm_page_queue_free
[color
],
1859 mem
->pageq
.next
= NULL
;
1860 mem
->pageq
.prev
= NULL
;
1862 color
= (color
+ 1) & vm_color_mask
;
1867 tail
->pageq
.next
= (queue_t
)mem
;
1870 mem
->pageq
.prev
= NULL
;
1871 assert(mem
->listq
.next
== NULL
&& mem
->listq
.prev
== NULL
);
1872 assert(mem
->tabled
== FALSE
);
1873 assert(mem
->object
== VM_OBJECT_NULL
);
1874 assert(!mem
->laundry
);
1878 assert(pmap_verify_free(mem
->phys_page
));
1881 assert(!mem
->encrypted
);
1882 assert(!mem
->pmapped
);
1883 assert(!mem
->wpmapped
);
1885 PROCESSOR_DATA(current_processor(), free_pages
) = head
->pageq
.next
;
1886 PROCESSOR_DATA(current_processor(), start_color
) = color
;
1889 * satisfy this request
1891 PROCESSOR_DATA(current_processor(), page_grab_count
) += 1;
1893 mem
->pageq
.next
= NULL
;
1895 lck_mtx_unlock(&vm_page_queue_free_lock
);
1897 enable_preemption();
1900 * Decide if we should poke the pageout daemon.
1901 * We do this if the free count is less than the low
1902 * water mark, or if the free count is less than the high
1903 * water mark (but above the low water mark) and the inactive
1904 * count is less than its target.
1906 * We don't have the counts locked ... if they change a little,
1907 * it doesn't really matter.
1909 if ((vm_page_free_count
< vm_page_free_min
) ||
1910 ((vm_page_free_count
< vm_page_free_target
) &&
1911 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
1912 thread_wakeup((event_t
) &vm_page_free_wanted
);
1919 * Decide if we need to poke the memorystatus notification thread.
1922 (vm_page_active_count
+ vm_page_inactive_count
+
1923 vm_page_speculative_count
+ vm_page_free_count
+
1924 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
1926 if (percent_avail
<= (kern_memorystatus_level
- 5)) {
1927 kern_memorystatus_level
= percent_avail
;
1928 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
1933 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1941 * Return a page to the free list.
1946 register vm_page_t mem
)
1949 int need_wakeup
= 0;
1950 int need_priv_wakeup
= 0;
1952 unsigned int pindex
;
1953 phys_entry
*physent
;
1955 physent
= mapping_phys_lookup(mem
->phys_page
, &pindex
); /* (BRINGUP) */
1956 if(physent
->ppLink
& ppN
) { /* (BRINGUP) */
1957 panic("vm_page_release: already released - %08X %08X\n", mem
, mem
->phys_page
);
1959 physent
->ppLink
= physent
->ppLink
| ppN
; /* (BRINGUP) */
1961 assert(!mem
->private && !mem
->fictitious
);
1962 if (vm_page_free_verify
) {
1963 assert(pmap_verify_free(mem
->phys_page
));
1965 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1968 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
1971 panic("vm_page_release");
1974 assert(!mem
->laundry
);
1975 assert(mem
->object
== VM_OBJECT_NULL
);
1976 assert(mem
->pageq
.next
== NULL
&&
1977 mem
->pageq
.prev
== NULL
);
1978 assert(mem
->listq
.next
== NULL
&&
1979 mem
->listq
.prev
== NULL
);
1981 if ((mem
->lopage
|| vm_lopage_refill
== TRUE
) &&
1982 vm_lopage_free_count
< vm_lopage_free_limit
&&
1983 mem
->phys_page
< max_valid_low_ppnum
) {
1985 * this exists to support hardware controllers
1986 * incapable of generating DMAs with more than 32 bits
1987 * of address on platforms with physical memory > 4G...
1989 queue_enter_first(&vm_lopage_queue_free
,
1993 vm_lopage_free_count
++;
1995 if (vm_lopage_free_count
>= vm_lopage_free_limit
)
1996 vm_lopage_refill
= FALSE
;
2000 mem
->lopage
= FALSE
;
2003 color
= mem
->phys_page
& vm_color_mask
;
2004 queue_enter_first(&vm_page_queue_free
[color
],
2008 vm_page_free_count
++;
2010 * Check if we should wake up someone waiting for page.
2011 * But don't bother waking them unless they can allocate.
2013 * We wakeup only one thread, to prevent starvation.
2014 * Because the scheduling system handles wait queues FIFO,
2015 * if we wakeup all waiting threads, one greedy thread
2016 * can starve multiple niceguy threads. When the threads
2017 * all wakeup, the greedy threads runs first, grabs the page,
2018 * and waits for another page. It will be the first to run
2019 * when the next page is freed.
2021 * However, there is a slight danger here.
2022 * The thread we wake might not use the free page.
2023 * Then the other threads could wait indefinitely
2024 * while the page goes unused. To forestall this,
2025 * the pageout daemon will keep making free pages
2026 * as long as vm_page_free_wanted is non-zero.
2029 assert(vm_page_free_count
> 0);
2030 if (vm_page_free_wanted_privileged
> 0) {
2031 vm_page_free_wanted_privileged
--;
2032 need_priv_wakeup
= 1;
2033 } else if (vm_page_free_wanted
> 0 &&
2034 vm_page_free_count
> vm_page_free_reserved
) {
2035 vm_page_free_wanted
--;
2039 lck_mtx_unlock(&vm_page_queue_free_lock
);
2041 if (need_priv_wakeup
)
2042 thread_wakeup_one((event_t
) &vm_page_free_wanted_privileged
);
2043 else if (need_wakeup
)
2044 thread_wakeup_one((event_t
) &vm_page_free_count
);
2051 * Decide if we need to poke the memorystatus notification thread.
2052 * Locking is not a big issue, as only a single thread delivers these.
2055 (vm_page_active_count
+ vm_page_inactive_count
+
2056 vm_page_speculative_count
+ vm_page_free_count
+
2057 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2059 if (percent_avail
>= (kern_memorystatus_level
+ 5)) {
2060 kern_memorystatus_level
= percent_avail
;
2061 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2070 * Wait for a page to become available.
2071 * If there are plenty of free pages, then we don't sleep.
2074 * TRUE: There may be another page, try again
2075 * FALSE: We were interrupted out of our wait, don't try again
2083 * We can't use vm_page_free_reserved to make this
2084 * determination. Consider: some thread might
2085 * need to allocate two pages. The first allocation
2086 * succeeds, the second fails. After the first page is freed,
2087 * a call to vm_page_wait must really block.
2089 kern_return_t wait_result
;
2090 int need_wakeup
= 0;
2091 int is_privileged
= current_thread()->options
& TH_OPT_VMPRIV
;
2093 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2095 if (is_privileged
&& vm_page_free_count
) {
2096 lck_mtx_unlock(&vm_page_queue_free_lock
);
2099 if (vm_page_free_count
< vm_page_free_target
) {
2101 if (is_privileged
) {
2102 if (vm_page_free_wanted_privileged
++ == 0)
2104 wait_result
= assert_wait((event_t
)&vm_page_free_wanted_privileged
, interruptible
);
2106 if (vm_page_free_wanted
++ == 0)
2108 wait_result
= assert_wait((event_t
)&vm_page_free_count
, interruptible
);
2110 lck_mtx_unlock(&vm_page_queue_free_lock
);
2111 counter(c_vm_page_wait_block
++);
2114 thread_wakeup((event_t
)&vm_page_free_wanted
);
2116 if (wait_result
== THREAD_WAITING
)
2117 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
2119 return(wait_result
== THREAD_AWAKENED
);
2121 lck_mtx_unlock(&vm_page_queue_free_lock
);
2129 * Allocate and return a memory cell associated
2130 * with this VM object/offset pair.
2132 * Object must be locked.
2138 vm_object_offset_t offset
)
2140 register vm_page_t mem
;
2142 vm_object_lock_assert_exclusive(object
);
2143 mem
= vm_page_grab();
2144 if (mem
== VM_PAGE_NULL
)
2145 return VM_PAGE_NULL
;
2147 vm_page_insert(mem
, object
, offset
);
2155 vm_object_offset_t offset
)
2157 register vm_page_t mem
;
2159 vm_object_lock_assert_exclusive(object
);
2160 mem
= vm_page_grablo();
2161 if (mem
== VM_PAGE_NULL
)
2162 return VM_PAGE_NULL
;
2164 vm_page_insert(mem
, object
, offset
);
2171 * vm_page_alloc_guard:
2173 * Allocate a fictitious page which will be used
2174 * as a guard page. The page will be inserted into
2175 * the object and returned to the caller.
2179 vm_page_alloc_guard(
2181 vm_object_offset_t offset
)
2183 register vm_page_t mem
;
2185 vm_object_lock_assert_exclusive(object
);
2186 mem
= vm_page_grab_guard();
2187 if (mem
== VM_PAGE_NULL
)
2188 return VM_PAGE_NULL
;
2190 vm_page_insert(mem
, object
, offset
);
2196 counter(unsigned int c_laundry_pages_freed
= 0;)
2201 * Returns the given page to the free list,
2202 * disassociating it with any VM object.
2204 * Object and page queues must be locked prior to entry.
2207 vm_page_free_prepare(
2208 register vm_page_t mem
)
2210 vm_page_free_prepare_queues(mem
);
2211 vm_page_free_prepare_object(mem
, TRUE
);
2216 vm_page_free_prepare_queues(
2221 assert(!mem
->cleaning
);
2222 assert(!mem
->pageout
);
2224 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2226 panic("vm_page_free: freeing page on free list\n");
2229 vm_object_lock_assert_exclusive(mem
->object
);
2234 * We may have to free a page while it's being laundered
2235 * if we lost its pager (due to a forced unmount, for example).
2236 * We need to call vm_pageout_throttle_up() before removing
2237 * the page from its VM object, so that we can find out on
2238 * which pageout queue the page is on.
2240 vm_pageout_throttle_up(mem
);
2241 counter(++c_laundry_pages_freed
);
2243 VM_PAGE_QUEUES_REMOVE(mem
); /* clears local/active/inactive/throttled/speculative */
2245 if (VM_PAGE_WIRED(mem
)) {
2247 assert(mem
->object
->wired_page_count
> 0);
2248 mem
->object
->wired_page_count
--;
2249 assert(mem
->object
->resident_page_count
>=
2250 mem
->object
->wired_page_count
);
2252 if (!mem
->private && !mem
->fictitious
)
2253 vm_page_wire_count
--;
2254 mem
->wire_count
= 0;
2255 assert(!mem
->gobbled
);
2256 } else if (mem
->gobbled
) {
2257 if (!mem
->private && !mem
->fictitious
)
2258 vm_page_wire_count
--;
2259 vm_page_gobble_count
--;
2265 vm_page_free_prepare_object(
2267 boolean_t remove_from_hash
)
2270 vm_object_lock_assert_exclusive(mem
->object
);
2274 vm_page_remove(mem
, remove_from_hash
); /* clears tabled, object, offset */
2276 PAGE_WAKEUP(mem
); /* clears wanted */
2279 mem
->private = FALSE
;
2280 mem
->fictitious
= TRUE
;
2281 mem
->phys_page
= vm_page_fictitious_addr
;
2283 if (mem
->fictitious
) {
2284 /* Some of these may be unnecessary */
2285 mem
->gobbled
= FALSE
;
2287 mem
->absent
= FALSE
;
2290 mem
->precious
= FALSE
;
2291 mem
->reference
= FALSE
;
2292 mem
->encrypted
= FALSE
;
2293 mem
->encrypted_cleaning
= FALSE
;
2294 mem
->pmapped
= FALSE
;
2295 mem
->wpmapped
= FALSE
;
2296 mem
->reusable
= FALSE
;
2298 if (mem
->zero_fill
== TRUE
)
2300 vm_page_init(mem
, mem
->phys_page
, mem
->lopage
);
2309 vm_page_free_prepare(mem
);
2310 if (mem
->fictitious
) {
2311 vm_page_release_fictitious(mem
);
2313 vm_page_release(mem
);
2319 vm_page_free_unlocked(
2321 boolean_t remove_from_hash
)
2323 vm_page_lockspin_queues();
2324 vm_page_free_prepare_queues(mem
);
2325 vm_page_unlock_queues();
2327 vm_page_free_prepare_object(mem
, remove_from_hash
);
2329 if (mem
->fictitious
) {
2330 vm_page_release_fictitious(mem
);
2332 vm_page_release(mem
);
2337 * Free a list of pages. The list can be up to several hundred pages,
2338 * as blocked up by vm_pageout_scan().
2339 * The big win is not having to take the free list lock once
2340 * per page. We sort the incoming pages into n lists, one for
2346 boolean_t prepare_object
)
2351 int inuse_list_head
= -1;
2353 queue_head_t free_list
[MAX_COLORS
];
2354 int inuse
[MAX_COLORS
];
2356 for (color
= 0; color
< (signed) vm_colors
; color
++) {
2357 queue_init(&free_list
[color
]);
2361 assert(!mem
->inactive
);
2362 assert(!mem
->active
);
2363 assert(!mem
->throttled
);
2365 assert(!mem
->speculative
);
2366 assert(!VM_PAGE_WIRED(mem
));
2367 assert(mem
->pageq
.prev
== NULL
);
2369 nxt
= (vm_page_t
)(mem
->pageq
.next
);
2371 if (prepare_object
== TRUE
)
2372 vm_page_free_prepare_object(mem
, TRUE
);
2374 if (vm_page_free_verify
&& !mem
->fictitious
&& !mem
->private) {
2375 assert(pmap_verify_free(mem
->phys_page
));
2379 if (!mem
->fictitious
) {
2380 if ((mem
->lopage
== TRUE
|| vm_lopage_refill
== TRUE
) &&
2381 vm_lopage_free_count
< vm_lopage_free_limit
&&
2382 mem
->phys_page
< max_valid_low_ppnum
) {
2383 mem
->pageq
.next
= NULL
;
2384 vm_page_release(mem
);
2388 * IMPORTANT: we can't set the page "free" here
2389 * because that would make the page eligible for
2390 * a physically-contiguous allocation (see
2391 * vm_page_find_contiguous()) right away (we don't
2392 * hold the vm_page_queue_free lock). That would
2393 * cause trouble because the page is not actually
2394 * in the free queue yet...
2396 color
= mem
->phys_page
& vm_color_mask
;
2397 if (queue_empty(&free_list
[color
])) {
2398 inuse
[color
] = inuse_list_head
;
2399 inuse_list_head
= color
;
2401 queue_enter_first(&free_list
[color
],
2408 assert(mem
->phys_page
== vm_page_fictitious_addr
||
2409 mem
->phys_page
== vm_page_guard_addr
);
2410 vm_page_release_fictitious(mem
);
2415 unsigned int avail_free_count
;
2416 unsigned int need_wakeup
= 0;
2417 unsigned int need_priv_wakeup
= 0;
2419 lck_mtx_lock_spin(&vm_page_queue_free_lock
);
2421 color
= inuse_list_head
;
2423 while( color
!= -1 ) {
2424 vm_page_t first
, last
;
2425 vm_page_t first_free
;
2428 * Now that we hold the vm_page_queue_free lock,
2429 * it's safe to mark all pages in our local queue
2432 queue_iterate(&free_list
[color
],
2442 * ... and insert our local queue at the head of
2443 * the global free queue.
2445 first
= (vm_page_t
) queue_first(&free_list
[color
]);
2446 last
= (vm_page_t
) queue_last(&free_list
[color
]);
2447 first_free
= (vm_page_t
) queue_first(&vm_page_queue_free
[color
]);
2448 if (queue_empty(&vm_page_queue_free
[color
])) {
2449 queue_last(&vm_page_queue_free
[color
]) =
2450 (queue_entry_t
) last
;
2452 queue_prev(&first_free
->pageq
) =
2453 (queue_entry_t
) last
;
2455 queue_first(&vm_page_queue_free
[color
]) =
2456 (queue_entry_t
) first
;
2457 queue_prev(&first
->pageq
) =
2458 (queue_entry_t
) &vm_page_queue_free
[color
];
2459 queue_next(&last
->pageq
) =
2460 (queue_entry_t
) first_free
;
2463 color
= inuse
[color
];
2466 vm_page_free_count
+= pg_count
;
2467 avail_free_count
= vm_page_free_count
;
2469 if (vm_page_free_wanted_privileged
> 0 &&
2470 avail_free_count
> 0) {
2471 if (avail_free_count
< vm_page_free_wanted_privileged
) {
2472 need_priv_wakeup
= avail_free_count
;
2473 vm_page_free_wanted_privileged
-=
2475 avail_free_count
= 0;
2477 need_priv_wakeup
= vm_page_free_wanted_privileged
;
2478 vm_page_free_wanted_privileged
= 0;
2480 vm_page_free_wanted_privileged
;
2484 if (vm_page_free_wanted
> 0 &&
2485 avail_free_count
> vm_page_free_reserved
) {
2486 unsigned int available_pages
;
2488 available_pages
= (avail_free_count
-
2489 vm_page_free_reserved
);
2491 if (available_pages
>= vm_page_free_wanted
) {
2492 need_wakeup
= vm_page_free_wanted
;
2493 vm_page_free_wanted
= 0;
2495 need_wakeup
= available_pages
;
2496 vm_page_free_wanted
-= available_pages
;
2499 lck_mtx_unlock(&vm_page_queue_free_lock
);
2501 if (need_priv_wakeup
!= 0) {
2503 * There shouldn't be that many VM-privileged threads,
2504 * so let's wake them all up, even if we don't quite
2505 * have enough pages to satisfy them all.
2507 thread_wakeup((event_t
)&vm_page_free_wanted_privileged
);
2509 if (need_wakeup
!= 0 && vm_page_free_wanted
== 0) {
2511 * We don't expect to have any more waiters
2512 * after this, so let's wake them all up at
2515 thread_wakeup((event_t
) &vm_page_free_count
);
2516 } else for (; need_wakeup
!= 0; need_wakeup
--) {
2518 * Wake up one waiter per page we just released.
2520 thread_wakeup_one((event_t
) &vm_page_free_count
);
2527 * Decide if we need to poke the memorystatus notification thread.
2530 (vm_page_active_count
+ vm_page_inactive_count
+
2531 vm_page_speculative_count
+ vm_page_free_count
+
2532 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2534 if (percent_avail
>= (kern_memorystatus_level
+ 5)) {
2535 kern_memorystatus_level
= percent_avail
;
2536 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2547 * Mark this page as wired down by yet
2548 * another map, removing it from paging queues
2551 * The page's object and the page queues must be locked.
2555 register vm_page_t mem
)
2558 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2562 vm_object_lock_assert_exclusive(mem
->object
);
2565 * In theory, the page should be in an object before it
2566 * gets wired, since we need to hold the object lock
2567 * to update some fields in the page structure.
2568 * However, some code (i386 pmap, for example) might want
2569 * to wire a page before it gets inserted into an object.
2570 * That's somewhat OK, as long as nobody else can get to
2571 * that page and update it at the same time.
2575 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2577 if ( !VM_PAGE_WIRED(mem
)) {
2578 VM_PAGE_QUEUES_REMOVE(mem
);
2581 mem
->object
->wired_page_count
++;
2582 assert(mem
->object
->resident_page_count
>=
2583 mem
->object
->wired_page_count
);
2584 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2585 assert(vm_page_purgeable_count
> 0);
2586 OSAddAtomic(-1, &vm_page_purgeable_count
);
2587 OSAddAtomic(1, &vm_page_purgeable_wired_count
);
2589 if (mem
->object
->all_reusable
) {
2591 * Wired pages are not counted as "re-usable"
2592 * in "all_reusable" VM objects, so nothing
2595 } else if (mem
->reusable
) {
2597 * This page is not "re-usable" when it's
2598 * wired, so adjust its state and the
2601 vm_object_reuse_pages(mem
->object
,
2603 mem
->offset
+PAGE_SIZE_64
,
2607 assert(!mem
->reusable
);
2609 if (!mem
->private && !mem
->fictitious
&& !mem
->gobbled
)
2610 vm_page_wire_count
++;
2612 vm_page_gobble_count
--;
2613 mem
->gobbled
= FALSE
;
2614 if (mem
->zero_fill
== TRUE
) {
2615 mem
->zero_fill
= FALSE
;
2623 * Decide if we need to poke the memorystatus notification thread.
2626 (vm_page_active_count
+ vm_page_inactive_count
+
2627 vm_page_speculative_count
+ vm_page_free_count
+
2628 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2630 if (percent_avail
<= (kern_memorystatus_level
- 5)) {
2631 kern_memorystatus_level
= percent_avail
;
2632 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2638 * The page could be encrypted, but
2639 * We don't have to decrypt it here
2640 * because we don't guarantee that the
2641 * data is actually valid at this point.
2642 * The page will get decrypted in
2643 * vm_fault_wire() if needed.
2646 assert(!mem
->gobbled
);
2654 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2656 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2660 register vm_page_t mem
)
2662 vm_page_lockspin_queues();
2665 assert(!mem
->gobbled
);
2666 assert( !VM_PAGE_WIRED(mem
));
2668 if (!mem
->gobbled
&& !VM_PAGE_WIRED(mem
)) {
2669 if (!mem
->private && !mem
->fictitious
)
2670 vm_page_wire_count
++;
2672 vm_page_gobble_count
++;
2673 mem
->gobbled
= TRUE
;
2674 vm_page_unlock_queues();
2680 * Release one wiring of this page, potentially
2681 * enabling it to be paged again.
2683 * The page's object and the page queues must be locked.
2691 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2694 assert(VM_PAGE_WIRED(mem
));
2695 assert(mem
->object
!= VM_OBJECT_NULL
);
2697 vm_object_lock_assert_exclusive(mem
->object
);
2698 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2700 if (--mem
->wire_count
== 0) {
2701 assert(!mem
->private && !mem
->fictitious
);
2702 vm_page_wire_count
--;
2703 assert(mem
->object
->wired_page_count
> 0);
2704 mem
->object
->wired_page_count
--;
2705 assert(mem
->object
->resident_page_count
>=
2706 mem
->object
->wired_page_count
);
2707 if (mem
->object
->purgable
== VM_PURGABLE_VOLATILE
) {
2708 OSAddAtomic(+1, &vm_page_purgeable_count
);
2709 assert(vm_page_purgeable_wired_count
> 0);
2710 OSAddAtomic(-1, &vm_page_purgeable_wired_count
);
2712 assert(!mem
->laundry
);
2713 assert(mem
->object
!= kernel_object
);
2714 assert(mem
->pageq
.next
== NULL
&& mem
->pageq
.prev
== NULL
);
2716 if (queueit
== TRUE
) {
2717 if (mem
->object
->purgable
== VM_PURGABLE_EMPTY
) {
2718 vm_page_deactivate(mem
);
2720 vm_page_activate(mem
);
2728 * Decide if we need to poke the memorystatus notification thread.
2731 (vm_page_active_count
+ vm_page_inactive_count
+
2732 vm_page_speculative_count
+ vm_page_free_count
+
2733 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
2735 if (percent_avail
>= (kern_memorystatus_level
+ 5)) {
2736 kern_memorystatus_level
= percent_avail
;
2737 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
2746 * vm_page_deactivate:
2748 * Returns the given page to the inactive list,
2749 * indicating that no physical maps have access
2750 * to this page. [Used by the physical mapping system.]
2752 * The page queues must be locked.
2758 vm_page_deactivate_internal(m
, TRUE
);
2763 vm_page_deactivate_internal(
2765 boolean_t clear_hw_reference
)
2769 assert(m
->object
!= kernel_object
);
2770 assert(m
->phys_page
!= vm_page_guard_addr
);
2772 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2774 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2777 * This page is no longer very interesting. If it was
2778 * interesting (active or inactive/referenced), then we
2779 * clear the reference bit and (re)enter it in the
2780 * inactive queue. Note wired pages should not have
2781 * their reference bit cleared.
2784 if (m
->absent
&& !m
->unusual
)
2785 panic("vm_page_deactivate: %p absent", m
);
2787 if (m
->gobbled
) { /* can this happen? */
2788 assert( !VM_PAGE_WIRED(m
));
2790 if (!m
->private && !m
->fictitious
)
2791 vm_page_wire_count
--;
2792 vm_page_gobble_count
--;
2795 if (m
->private || (VM_PAGE_WIRED(m
)))
2798 if (!m
->fictitious
&& !m
->absent
&& clear_hw_reference
== TRUE
)
2799 pmap_clear_reference(m
->phys_page
);
2801 m
->reference
= FALSE
;
2802 m
->no_cache
= FALSE
;
2805 VM_PAGE_QUEUES_REMOVE(m
);
2807 assert(!m
->laundry
);
2808 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2810 if (!IP_VALID(memory_manager_default
) &&
2811 m
->dirty
&& m
->object
->internal
&&
2812 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2813 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2814 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2815 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2816 m
->throttled
= TRUE
;
2817 vm_page_throttled_count
++;
2819 if (!m
->fictitious
&& m
->object
->named
&& m
->object
->ref_count
== 1) {
2820 vm_page_speculate(m
, FALSE
);
2821 #if DEVELOPMENT || DEBUG
2822 vm_page_speculative_recreated
++;
2827 queue_enter(&vm_page_queue_zf
, m
, vm_page_t
, pageq
);
2828 vm_zf_queue_count
++;
2830 queue_enter(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
2834 if (!m
->fictitious
) {
2835 vm_page_inactive_count
++;
2836 token_new_pagecount
++;
2845 * Put the specified page on the active list (if appropriate).
2847 * The page queues must be locked.
2852 register vm_page_t m
)
2855 #ifdef FIXME_4778297
2856 assert(m
->object
!= kernel_object
);
2858 assert(m
->phys_page
!= vm_page_guard_addr
);
2860 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2863 if (m
->absent
&& !m
->unusual
)
2864 panic("vm_page_activate: %p absent", m
);
2867 assert( !VM_PAGE_WIRED(m
));
2868 if (!m
->private && !m
->fictitious
)
2869 vm_page_wire_count
--;
2870 vm_page_gobble_count
--;
2878 panic("vm_page_activate: already active");
2881 if (m
->speculative
) {
2882 DTRACE_VM2(pgrec
, int, 1, (uint64_t *), NULL
);
2883 DTRACE_VM2(pgfrec
, int, 1, (uint64_t *), NULL
);
2886 VM_PAGE_QUEUES_REMOVE(m
);
2888 if ( !VM_PAGE_WIRED(m
)) {
2889 assert(!m
->laundry
);
2890 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
2891 if (!IP_VALID(memory_manager_default
) &&
2892 !m
->fictitious
&& m
->dirty
&& m
->object
->internal
&&
2893 (m
->object
->purgable
== VM_PURGABLE_DENY
||
2894 m
->object
->purgable
== VM_PURGABLE_NONVOLATILE
||
2895 m
->object
->purgable
== VM_PURGABLE_VOLATILE
)) {
2896 queue_enter(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
);
2897 m
->throttled
= TRUE
;
2898 vm_page_throttled_count
++;
2900 queue_enter(&vm_page_queue_active
, m
, vm_page_t
, pageq
);
2903 vm_page_active_count
++;
2905 m
->reference
= TRUE
;
2906 m
->no_cache
= FALSE
;
2913 * vm_page_speculate:
2915 * Put the specified page on the speculative list (if appropriate).
2917 * The page queues must be locked.
2924 struct vm_speculative_age_q
*aq
;
2927 assert(m
->object
!= kernel_object
);
2928 assert(m
->phys_page
!= vm_page_guard_addr
);
2930 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
2933 if (m
->absent
&& !m
->unusual
)
2934 panic("vm_page_speculate: %p absent", m
);
2936 VM_PAGE_QUEUES_REMOVE(m
);
2938 if ( !VM_PAGE_WIRED(m
)) {
2943 clock_get_system_nanotime(&sec
, &nsec
);
2944 ts
.tv_sec
= (unsigned int) sec
;
2947 if (vm_page_speculative_count
== 0) {
2949 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2950 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2952 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2955 * set the timer to begin a new group
2957 aq
->age_ts
.tv_sec
= VM_PAGE_SPECULATIVE_Q_AGE_MS
/ 1000;
2958 aq
->age_ts
.tv_nsec
= (VM_PAGE_SPECULATIVE_Q_AGE_MS
% 1000) * 1000 * NSEC_PER_USEC
;
2960 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2962 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2964 if (CMP_MACH_TIMESPEC(&ts
, &aq
->age_ts
) >= 0) {
2966 speculative_age_index
++;
2968 if (speculative_age_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2969 speculative_age_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2970 if (speculative_age_index
== speculative_steal_index
) {
2971 speculative_steal_index
= speculative_age_index
+ 1;
2973 if (speculative_steal_index
> VM_PAGE_MAX_SPECULATIVE_AGE_Q
)
2974 speculative_steal_index
= VM_PAGE_MIN_SPECULATIVE_AGE_Q
;
2976 aq
= &vm_page_queue_speculative
[speculative_age_index
];
2978 if (!queue_empty(&aq
->age_q
))
2979 vm_page_speculate_ageit(aq
);
2981 aq
->age_ts
.tv_sec
= VM_PAGE_SPECULATIVE_Q_AGE_MS
/ 1000;
2982 aq
->age_ts
.tv_nsec
= (VM_PAGE_SPECULATIVE_Q_AGE_MS
% 1000) * 1000 * NSEC_PER_USEC
;
2984 ADD_MACH_TIMESPEC(&aq
->age_ts
, &ts
);
2987 enqueue_tail(&aq
->age_q
, &m
->pageq
);
2988 m
->speculative
= TRUE
;
2989 vm_page_speculative_count
++;
2992 m
->object
->pages_created
++;
2993 #if DEVELOPMENT || DEBUG
2994 vm_page_speculative_created
++;
3003 * move pages from the specified aging bin to
3004 * the speculative bin that pageout_scan claims from
3006 * The page queues must be locked.
3009 vm_page_speculate_ageit(struct vm_speculative_age_q
*aq
)
3011 struct vm_speculative_age_q
*sq
;
3014 sq
= &vm_page_queue_speculative
[VM_PAGE_SPECULATIVE_AGED_Q
];
3016 if (queue_empty(&sq
->age_q
)) {
3017 sq
->age_q
.next
= aq
->age_q
.next
;
3018 sq
->age_q
.prev
= aq
->age_q
.prev
;
3020 t
= (vm_page_t
)sq
->age_q
.next
;
3021 t
->pageq
.prev
= &sq
->age_q
;
3023 t
= (vm_page_t
)sq
->age_q
.prev
;
3024 t
->pageq
.next
= &sq
->age_q
;
3026 t
= (vm_page_t
)sq
->age_q
.prev
;
3027 t
->pageq
.next
= aq
->age_q
.next
;
3029 t
= (vm_page_t
)aq
->age_q
.next
;
3030 t
->pageq
.prev
= sq
->age_q
.prev
;
3032 t
= (vm_page_t
)aq
->age_q
.prev
;
3033 t
->pageq
.next
= &sq
->age_q
;
3035 sq
->age_q
.prev
= aq
->age_q
.prev
;
3037 queue_init(&aq
->age_q
);
3046 assert(m
->object
!= kernel_object
);
3047 assert(m
->phys_page
!= vm_page_guard_addr
);
3050 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
3052 if (m
->active
|| m
->reference
)
3055 if (m
->private || (VM_PAGE_WIRED(m
)))
3058 m
->no_cache
= FALSE
;
3060 VM_PAGE_QUEUES_REMOVE(m
);
3062 assert(!m
->laundry
);
3063 assert(m
->pageq
.next
== NULL
&& m
->pageq
.prev
== NULL
);
3065 queue_enter(&vm_page_queue_inactive
, m
, vm_page_t
, pageq
);
3068 vm_page_inactive_count
++;
3069 token_new_pagecount
++;
3074 vm_page_reactivate_all_throttled(void)
3076 vm_page_t first_throttled
, last_throttled
;
3077 vm_page_t first_active
;
3079 int extra_active_count
;
3081 extra_active_count
= 0;
3082 vm_page_lock_queues();
3083 if (! queue_empty(&vm_page_queue_throttled
)) {
3085 * Switch "throttled" pages to "active".
3087 queue_iterate(&vm_page_queue_throttled
, m
, vm_page_t
, pageq
) {
3089 assert(m
->throttled
);
3091 assert(!m
->inactive
);
3092 assert(!m
->speculative
);
3093 assert(!VM_PAGE_WIRED(m
));
3094 if (!m
->fictitious
) {
3095 extra_active_count
++;
3097 m
->throttled
= FALSE
;
3103 * Transfer the entire throttled queue to a regular LRU page queues.
3104 * We insert it at the head of the active queue, so that these pages
3105 * get re-evaluated by the LRU algorithm first, since they've been
3106 * completely out of it until now.
3108 first_throttled
= (vm_page_t
) queue_first(&vm_page_queue_throttled
);
3109 last_throttled
= (vm_page_t
) queue_last(&vm_page_queue_throttled
);
3110 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3111 if (queue_empty(&vm_page_queue_active
)) {
3112 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_throttled
;
3114 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_throttled
;
3116 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_throttled
;
3117 queue_prev(&first_throttled
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3118 queue_next(&last_throttled
->pageq
) = (queue_entry_t
) first_active
;
3121 printf("reactivated %d throttled pages\n", vm_page_throttled_count
);
3123 queue_init(&vm_page_queue_throttled
);
3125 * Adjust the global page counts.
3127 vm_page_active_count
+= extra_active_count
;
3128 vm_page_throttled_count
= 0;
3130 assert(vm_page_throttled_count
== 0);
3131 assert(queue_empty(&vm_page_queue_throttled
));
3132 vm_page_unlock_queues();
3137 * move pages from the indicated local queue to the global active queue
3138 * its ok to fail if we're below the hard limit and force == FALSE
3139 * the nolocks == TRUE case is to allow this function to be run on
3140 * the hibernate path
3144 vm_page_reactivate_local(uint32_t lid
, boolean_t force
, boolean_t nolocks
)
3147 vm_page_t first_local
, last_local
;
3148 vm_page_t first_active
;
3152 if (vm_page_local_q
== NULL
)
3155 lq
= &vm_page_local_q
[lid
].vpl_un
.vpl
;
3157 if (nolocks
== FALSE
) {
3158 if (lq
->vpl_count
< vm_page_local_q_hard_limit
&& force
== FALSE
) {
3159 if ( !vm_page_trylockspin_queues())
3162 vm_page_lockspin_queues();
3164 VPL_LOCK(&lq
->vpl_lock
);
3166 if (lq
->vpl_count
) {
3168 * Switch "local" pages to "active".
3170 assert(!queue_empty(&lq
->vpl_queue
));
3172 queue_iterate(&lq
->vpl_queue
, m
, vm_page_t
, pageq
) {
3176 assert(!m
->inactive
);
3177 assert(!m
->speculative
);
3178 assert(!VM_PAGE_WIRED(m
));
3179 assert(!m
->throttled
);
3180 assert(!m
->fictitious
);
3182 if (m
->local_id
!= lid
)
3183 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m
);
3192 if (count
!= lq
->vpl_count
)
3193 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count
, lq
->vpl_count
);
3196 * Transfer the entire local queue to a regular LRU page queues.
3198 first_local
= (vm_page_t
) queue_first(&lq
->vpl_queue
);
3199 last_local
= (vm_page_t
) queue_last(&lq
->vpl_queue
);
3200 first_active
= (vm_page_t
) queue_first(&vm_page_queue_active
);
3202 if (queue_empty(&vm_page_queue_active
)) {
3203 queue_last(&vm_page_queue_active
) = (queue_entry_t
) last_local
;
3205 queue_prev(&first_active
->pageq
) = (queue_entry_t
) last_local
;
3207 queue_first(&vm_page_queue_active
) = (queue_entry_t
) first_local
;
3208 queue_prev(&first_local
->pageq
) = (queue_entry_t
) &vm_page_queue_active
;
3209 queue_next(&last_local
->pageq
) = (queue_entry_t
) first_active
;
3211 queue_init(&lq
->vpl_queue
);
3213 * Adjust the global page counts.
3215 vm_page_active_count
+= lq
->vpl_count
;
3218 assert(queue_empty(&lq
->vpl_queue
));
3220 if (nolocks
== FALSE
) {
3221 VPL_UNLOCK(&lq
->vpl_lock
);
3222 vm_page_unlock_queues();
3227 * vm_page_part_zero_fill:
3229 * Zero-fill a part of the page.
3232 vm_page_part_zero_fill(
3240 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3241 pmap_zero_part_page(m
->phys_page
, m_pa
, len
);
3244 tmp
= vm_page_grab();
3245 if (tmp
== VM_PAGE_NULL
) {
3246 vm_page_wait(THREAD_UNINT
);
3251 vm_page_zero_fill(tmp
);
3253 vm_page_part_copy(m
, 0, tmp
, 0, m_pa
);
3255 if((m_pa
+ len
) < PAGE_SIZE
) {
3256 vm_page_part_copy(m
, m_pa
+ len
, tmp
,
3257 m_pa
+ len
, PAGE_SIZE
- (m_pa
+ len
));
3259 vm_page_copy(tmp
,m
);
3266 * vm_page_zero_fill:
3268 * Zero-fill the specified page.
3275 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3276 m
->object
, m
->offset
, m
, 0,0);
3280 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3281 pmap_zero_page(m
->phys_page
);
3285 * vm_page_part_copy:
3287 * copy part of one page to another
3298 VM_PAGE_CHECK(src_m
);
3299 VM_PAGE_CHECK(dst_m
);
3301 pmap_copy_part_page(src_m
->phys_page
, src_pa
,
3302 dst_m
->phys_page
, dst_pa
, len
);
3308 * Copy one page to another
3311 * The source page should not be encrypted. The caller should
3312 * make sure the page is decrypted first, if necessary.
3315 int vm_page_copy_cs_validations
= 0;
3316 int vm_page_copy_cs_tainted
= 0;
3324 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3325 src_m
->object
, src_m
->offset
,
3326 dest_m
->object
, dest_m
->offset
,
3329 VM_PAGE_CHECK(src_m
);
3330 VM_PAGE_CHECK(dest_m
);
3334 * The source page should not be encrypted at this point.
3335 * The destination page will therefore not contain encrypted
3336 * data after the copy.
3338 if (src_m
->encrypted
) {
3339 panic("vm_page_copy: source page %p is encrypted\n", src_m
);
3341 dest_m
->encrypted
= FALSE
;
3343 if (src_m
->object
!= VM_OBJECT_NULL
&&
3344 src_m
->object
->code_signed
) {
3346 * We're copying a page from a code-signed object.
3347 * Whoever ends up mapping the copy page might care about
3348 * the original page's integrity, so let's validate the
3351 vm_page_copy_cs_validations
++;
3352 vm_page_validate_cs(src_m
);
3355 * Propagate the cs_tainted bit to the copy page. Do not propagate
3356 * the cs_validated bit.
3358 dest_m
->cs_tainted
= src_m
->cs_tainted
;
3359 if (dest_m
->cs_tainted
) {
3360 vm_page_copy_cs_tainted
++;
3363 pmap_copy_page(src_m
->phys_page
, dest_m
->phys_page
);
3371 printf("vm_page %p: \n", p
);
3372 printf(" pageq: next=%p prev=%p\n", p
->pageq
.next
, p
->pageq
.prev
);
3373 printf(" listq: next=%p prev=%p\n", p
->listq
.next
, p
->listq
.prev
);
3374 printf(" next=%p\n", p
->next
);
3375 printf(" object=%p offset=0x%llx\n", p
->object
, p
->offset
);
3376 printf(" wire_count=%u\n", p
->wire_count
);
3378 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3379 (p
->local
? "" : "!"),
3380 (p
->inactive
? "" : "!"),
3381 (p
->active
? "" : "!"),
3382 (p
->pageout_queue
? "" : "!"),
3383 (p
->speculative
? "" : "!"),
3384 (p
->laundry
? "" : "!"));
3385 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3386 (p
->free
? "" : "!"),
3387 (p
->reference
? "" : "!"),
3388 (p
->gobbled
? "" : "!"),
3389 (p
->private ? "" : "!"),
3390 (p
->throttled
? "" : "!"));
3391 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3392 (p
->busy
? "" : "!"),
3393 (p
->wanted
? "" : "!"),
3394 (p
->tabled
? "" : "!"),
3395 (p
->fictitious
? "" : "!"),
3396 (p
->pmapped
? "" : "!"),
3397 (p
->wpmapped
? "" : "!"));
3398 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3399 (p
->pageout
? "" : "!"),
3400 (p
->absent
? "" : "!"),
3401 (p
->error
? "" : "!"),
3402 (p
->dirty
? "" : "!"),
3403 (p
->cleaning
? "" : "!"),
3404 (p
->precious
? "" : "!"),
3405 (p
->clustered
? "" : "!"));
3406 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3407 (p
->overwriting
? "" : "!"),
3408 (p
->restart
? "" : "!"),
3409 (p
->unusual
? "" : "!"),
3410 (p
->encrypted
? "" : "!"),
3411 (p
->encrypted_cleaning
? "" : "!"));
3412 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3413 (p
->list_req_pending
? "" : "!"),
3414 (p
->dump_cleaning
? "" : "!"),
3415 (p
->cs_validated
? "" : "!"),
3416 (p
->cs_tainted
? "" : "!"),
3417 (p
->no_cache
? "" : "!"));
3418 printf(" %szero_fill\n",
3419 (p
->zero_fill
? "" : "!"));
3421 printf("phys_page=0x%x\n", p
->phys_page
);
3425 * Check that the list of pages is ordered by
3426 * ascending physical address and has no holes.
3429 vm_page_verify_contiguous(
3431 unsigned int npages
)
3433 register vm_page_t m
;
3434 unsigned int page_count
;
3435 vm_offset_t prev_addr
;
3437 prev_addr
= pages
->phys_page
;
3439 for (m
= NEXT_PAGE(pages
); m
!= VM_PAGE_NULL
; m
= NEXT_PAGE(m
)) {
3440 if (m
->phys_page
!= prev_addr
+ 1) {
3441 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3442 m
, (long)prev_addr
, m
->phys_page
);
3443 printf("pages %p page_count %d\n", pages
, page_count
);
3444 panic("vm_page_verify_contiguous: not contiguous!");
3446 prev_addr
= m
->phys_page
;
3449 if (page_count
!= npages
) {
3450 printf("pages %p actual count 0x%x but requested 0x%x\n",
3451 pages
, page_count
, npages
);
3452 panic("vm_page_verify_contiguous: count error");
3459 * Check the free lists for proper length etc.
3462 vm_page_verify_free_list(
3463 queue_head_t
*vm_page_queue
,
3465 vm_page_t look_for_page
,
3466 boolean_t expect_page
)
3468 unsigned int npages
;
3471 boolean_t found_page
;
3475 prev_m
= (vm_page_t
) vm_page_queue
;
3476 queue_iterate(vm_page_queue
,
3480 if (m
== look_for_page
) {
3483 if ((vm_page_t
) m
->pageq
.prev
!= prev_m
)
3484 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3485 color
, npages
, m
, m
->pageq
.prev
, prev_m
);
3487 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3490 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3492 if ( color
!= (unsigned int) -1 && (m
->phys_page
& vm_color_mask
) != color
)
3493 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3494 color
, npages
, m
, m
->phys_page
& vm_color_mask
, color
);
3498 if (look_for_page
!= VM_PAGE_NULL
) {
3499 unsigned int other_color
;
3501 if (expect_page
&& !found_page
) {
3502 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3503 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3504 _vm_page_print(look_for_page
);
3505 for (other_color
= 0;
3506 other_color
< vm_colors
;
3508 if (other_color
== color
)
3510 vm_page_verify_free_list(&vm_page_queue_free
[other_color
],
3511 other_color
, look_for_page
, FALSE
);
3513 if (color
!= (unsigned int) -1) {
3514 vm_page_verify_free_list(&vm_lopage_queue_free
,
3515 (unsigned int) -1, look_for_page
, FALSE
);
3518 panic("vm_page_verify_free_list(color=%u)\n", color
);
3520 if (!expect_page
&& found_page
) {
3521 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3522 color
, npages
, look_for_page
, look_for_page
->phys_page
);
3528 static boolean_t vm_page_verify_free_lists_enabled
= FALSE
;
3530 vm_page_verify_free_lists( void )
3532 unsigned int color
, npages
, nlopages
;
3534 if (! vm_page_verify_free_lists_enabled
)
3539 lck_mtx_lock(&vm_page_queue_free_lock
);
3541 for( color
= 0; color
< vm_colors
; color
++ ) {
3542 npages
+= vm_page_verify_free_list(&vm_page_queue_free
[color
],
3543 color
, VM_PAGE_NULL
, FALSE
);
3546 nlopages
= vm_page_verify_free_list(&vm_lopage_queue_free
,
3548 VM_PAGE_NULL
, FALSE
);
3549 if (npages
!= vm_page_free_count
|| nlopages
!= vm_lopage_free_count
)
3550 panic("vm_page_verify_free_lists: "
3551 "npages %u free_count %d nlopages %u lo_free_count %u",
3552 npages
, vm_page_free_count
, nlopages
, vm_lopage_free_count
);
3553 lck_mtx_unlock(&vm_page_queue_free_lock
);
3557 vm_page_queues_assert(
3561 if (mem
->free
+ mem
->active
+ mem
->inactive
+ mem
->speculative
+
3562 mem
->throttled
+ mem
->pageout_queue
> (val
)) {
3563 _vm_page_print(mem
);
3564 panic("vm_page_queues_assert(%p, %d)\n", mem
, val
);
3566 if (VM_PAGE_WIRED(mem
)) {
3567 assert(!mem
->active
);
3568 assert(!mem
->inactive
);
3569 assert(!mem
->speculative
);
3570 assert(!mem
->throttled
);
3573 #endif /* MACH_ASSERT */
3577 * CONTIGUOUS PAGE ALLOCATION
3579 * Find a region large enough to contain at least n pages
3580 * of contiguous physical memory.
3582 * This is done by traversing the vm_page_t array in a linear fashion
3583 * we assume that the vm_page_t array has the avaiable physical pages in an
3584 * ordered, ascending list... this is currently true of all our implementations
3585 * and must remain so... there can be 'holes' in the array... we also can
3586 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3587 * which use to happen via 'vm_page_convert'... that function was no longer
3588 * being called and was removed...
3590 * The basic flow consists of stabilizing some of the interesting state of
3591 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3592 * sweep at the beginning of the array looking for pages that meet our criterea
3593 * for a 'stealable' page... currently we are pretty conservative... if the page
3594 * meets this criterea and is physically contiguous to the previous page in the 'run'
3595 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3596 * and start to develop a new run... if at this point we've already considered
3597 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3598 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3599 * to other threads trying to acquire free pages (or move pages from q to q),
3600 * and then continue from the spot we left off... we only make 1 pass through the
3601 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3602 * which steals the pages from the queues they're currently on... pages on the free
3603 * queue can be stolen directly... pages that are on any of the other queues
3604 * must be removed from the object they are tabled on... this requires taking the
3605 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3606 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3607 * dump the pages we've currently stolen back to the free list, and pick up our
3608 * scan from the point where we aborted the 'current' run.
3612 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3614 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3619 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3622 #define RESET_STATE_OF_RUN() \
3624 prevcontaddr = -2; \
3626 free_considered = 0; \
3627 substitute_needed = 0; \
3632 * Can we steal in-use (i.e. not free) pages when searching for
3633 * physically-contiguous pages ?
3635 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3637 static unsigned int vm_page_find_contiguous_last_idx
= 0, vm_page_lomem_find_contiguous_last_idx
= 0;
3639 int vm_page_find_contig_debug
= 0;
3643 vm_page_find_contiguous(
3644 unsigned int contig_pages
,
3651 ppnum_t prevcontaddr
;
3653 unsigned int npages
, considered
, scanned
;
3654 unsigned int page_idx
, start_idx
, last_idx
, orig_last_idx
;
3655 unsigned int idx_last_contig_page_found
= 0;
3656 int free_considered
, free_available
;
3657 int substitute_needed
;
3660 clock_sec_t tv_start_sec
, tv_end_sec
;
3661 clock_usec_t tv_start_usec
, tv_end_usec
;
3666 int stolen_pages
= 0;
3669 if (contig_pages
== 0)
3670 return VM_PAGE_NULL
;
3673 vm_page_verify_free_lists();
3676 clock_get_system_microtime(&tv_start_sec
, &tv_start_usec
);
3678 vm_page_lock_queues();
3679 lck_mtx_lock(&vm_page_queue_free_lock
);
3681 RESET_STATE_OF_RUN();
3685 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3689 if(flags
& KMA_LOMEM
)
3690 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
;
3692 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
;
3694 orig_last_idx
= idx_last_contig_page_found
;
3695 last_idx
= orig_last_idx
;
3697 for (page_idx
= last_idx
, start_idx
= last_idx
;
3698 npages
< contig_pages
&& page_idx
< vm_pages_count
;
3703 page_idx
>= orig_last_idx
) {
3705 * We're back where we started and we haven't
3706 * found any suitable contiguous range. Let's
3712 m
= &vm_pages
[page_idx
];
3714 assert(!m
->fictitious
);
3715 assert(!m
->private);
3717 if (max_pnum
&& m
->phys_page
> max_pnum
) {
3718 /* no more low pages... */
3721 if (!npages
&& ((m
->phys_page
& pnum_mask
) != 0)) {
3725 RESET_STATE_OF_RUN();
3727 } else if (VM_PAGE_WIRED(m
) || m
->gobbled
||
3728 m
->encrypted
|| m
->encrypted_cleaning
|| m
->cs_validated
|| m
->cs_tainted
||
3729 m
->error
|| m
->absent
|| m
->pageout_queue
|| m
->laundry
|| m
->wanted
|| m
->precious
||
3730 m
->cleaning
|| m
->overwriting
|| m
->restart
|| m
->unusual
|| m
->list_req_pending
||
3733 * page is in a transient state
3734 * or a state we don't want to deal
3735 * with, so don't consider it which
3736 * means starting a new run
3738 RESET_STATE_OF_RUN();
3740 } else if (!m
->free
&& !m
->active
&& !m
->inactive
&& !m
->speculative
&& !m
->throttled
) {
3742 * page needs to be on one of our queues
3743 * in order for it to be stable behind the
3744 * locks we hold at this point...
3745 * if not, don't consider it which
3746 * means starting a new run
3748 RESET_STATE_OF_RUN();
3750 } else if (!m
->free
&& (!m
->tabled
|| m
->busy
)) {
3752 * pages on the free list are always 'busy'
3753 * so we couldn't test for 'busy' in the check
3754 * for the transient states... pages that are
3755 * 'free' are never 'tabled', so we also couldn't
3756 * test for 'tabled'. So we check here to make
3757 * sure that a non-free page is not busy and is
3758 * tabled on an object...
3759 * if not, don't consider it which
3760 * means starting a new run
3762 RESET_STATE_OF_RUN();
3765 if (m
->phys_page
!= prevcontaddr
+ 1) {
3766 if ((m
->phys_page
& pnum_mask
) != 0) {
3767 RESET_STATE_OF_RUN();
3771 start_idx
= page_idx
;
3772 start_pnum
= m
->phys_page
;
3777 prevcontaddr
= m
->phys_page
;
3784 * This page is not free.
3785 * If we can't steal used pages,
3786 * we have to give up this run
3788 * Otherwise, we might need to
3789 * move the contents of this page
3790 * into a substitute page.
3792 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3793 if (m
->pmapped
|| m
->dirty
) {
3794 substitute_needed
++;
3797 RESET_STATE_OF_RUN();
3801 if ((free_considered
+ substitute_needed
) > free_available
) {
3803 * if we let this run continue
3804 * we will end up dropping the vm_page_free_count
3805 * below the reserve limit... we need to abort
3806 * this run, but we can at least re-consider this
3807 * page... thus the jump back to 'retry'
3809 RESET_STATE_OF_RUN();
3811 if (free_available
&& considered
<= MAX_CONSIDERED_BEFORE_YIELD
) {
3816 * free_available == 0
3817 * so can't consider any free pages... if
3818 * we went to retry in this case, we'd
3819 * get stuck looking at the same page
3820 * w/o making any forward progress
3821 * we also want to take this path if we've already
3822 * reached our limit that controls the lock latency
3827 if (considered
> MAX_CONSIDERED_BEFORE_YIELD
&& npages
<= 1) {
3829 lck_mtx_unlock(&vm_page_queue_free_lock
);
3830 vm_page_unlock_queues();
3834 vm_page_lock_queues();
3835 lck_mtx_lock(&vm_page_queue_free_lock
);
3837 RESET_STATE_OF_RUN();
3839 * reset our free page limit since we
3840 * dropped the lock protecting the vm_page_free_queue
3842 free_available
= vm_page_free_count
- vm_page_free_reserved
;
3853 if (npages
!= contig_pages
) {
3856 * We didn't find a contiguous range but we didn't
3857 * start from the very first page.
3858 * Start again from the very first page.
3860 RESET_STATE_OF_RUN();
3861 if( flags
& KMA_LOMEM
)
3862 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= 0;
3864 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= 0;
3866 page_idx
= last_idx
;
3870 lck_mtx_unlock(&vm_page_queue_free_lock
);
3874 unsigned int cur_idx
;
3875 unsigned int tmp_start_idx
;
3876 vm_object_t locked_object
= VM_OBJECT_NULL
;
3877 boolean_t abort_run
= FALSE
;
3879 assert(page_idx
- start_idx
== contig_pages
);
3881 tmp_start_idx
= start_idx
;
3884 * first pass through to pull the free pages
3885 * off of the free queue so that in case we
3886 * need substitute pages, we won't grab any
3887 * of the free pages in the run... we'll clear
3888 * the 'free' bit in the 2nd pass, and even in
3889 * an abort_run case, we'll collect all of the
3890 * free pages in this run and return them to the free list
3892 while (start_idx
< page_idx
) {
3894 m1
= &vm_pages
[start_idx
++];
3896 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3903 color
= m1
->phys_page
& vm_color_mask
;
3905 vm_page_verify_free_list(&vm_page_queue_free
[color
],
3908 queue_remove(&vm_page_queue_free
[color
],
3912 m1
->pageq
.next
= NULL
;
3913 m1
->pageq
.prev
= NULL
;
3915 vm_page_verify_free_list(&vm_page_queue_free
[color
],
3916 color
, VM_PAGE_NULL
, FALSE
);
3919 * Clear the "free" bit so that this page
3920 * does not get considered for another
3921 * concurrent physically-contiguous allocation.
3926 vm_page_free_count
--;
3930 * adjust global freelist counts
3932 if (vm_page_free_count
< vm_page_free_count_minimum
)
3933 vm_page_free_count_minimum
= vm_page_free_count
;
3935 if( flags
& KMA_LOMEM
)
3936 vm_page_lomem_find_contiguous_last_idx
= page_idx
;
3938 vm_page_find_contiguous_last_idx
= page_idx
;
3941 * we can drop the free queue lock at this point since
3942 * we've pulled any 'free' candidates off of the list
3943 * we need it dropped so that we can do a vm_page_grab
3944 * when substituing for pmapped/dirty pages
3946 lck_mtx_unlock(&vm_page_queue_free_lock
);
3948 start_idx
= tmp_start_idx
;
3949 cur_idx
= page_idx
- 1;
3951 while (start_idx
++ < page_idx
) {
3953 * must go through the list from back to front
3954 * so that the page list is created in the
3955 * correct order - low -> high phys addresses
3957 m1
= &vm_pages
[cur_idx
--];
3960 if (m1
->object
== VM_OBJECT_NULL
) {
3962 * page has already been removed from
3963 * the free list in the 1st pass
3965 assert(m1
->offset
== (vm_object_offset_t
) -1);
3967 assert(!m1
->wanted
);
3968 assert(!m1
->laundry
);
3972 if (abort_run
== TRUE
)
3975 object
= m1
->object
;
3977 if (object
!= locked_object
) {
3978 if (locked_object
) {
3979 vm_object_unlock(locked_object
);
3980 locked_object
= VM_OBJECT_NULL
;
3982 if (vm_object_lock_try(object
))
3983 locked_object
= object
;
3985 if (locked_object
== VM_OBJECT_NULL
||
3986 (VM_PAGE_WIRED(m1
) || m1
->gobbled
||
3987 m1
->encrypted
|| m1
->encrypted_cleaning
|| m1
->cs_validated
|| m1
->cs_tainted
||
3988 m1
->error
|| m1
->absent
|| m1
->pageout_queue
|| m1
->laundry
|| m1
->wanted
|| m1
->precious
||
3989 m1
->cleaning
|| m1
->overwriting
|| m1
->restart
|| m1
->unusual
|| m1
->list_req_pending
|| m1
->busy
)) {
3991 if (locked_object
) {
3992 vm_object_unlock(locked_object
);
3993 locked_object
= VM_OBJECT_NULL
;
3995 tmp_start_idx
= cur_idx
;
3999 if (m1
->pmapped
|| m1
->dirty
) {
4001 vm_object_offset_t offset
;
4003 m2
= vm_page_grab();
4005 if (m2
== VM_PAGE_NULL
) {
4006 if (locked_object
) {
4007 vm_object_unlock(locked_object
);
4008 locked_object
= VM_OBJECT_NULL
;
4010 tmp_start_idx
= cur_idx
;
4015 refmod
= pmap_disconnect(m1
->phys_page
);
4018 vm_page_copy(m1
, m2
);
4020 m2
->reference
= m1
->reference
;
4021 m2
->dirty
= m1
->dirty
;
4023 if (refmod
& VM_MEM_REFERENCED
)
4024 m2
->reference
= TRUE
;
4025 if (refmod
& VM_MEM_MODIFIED
)
4027 offset
= m1
->offset
;
4030 * completely cleans up the state
4031 * of the page so that it is ready
4032 * to be put onto the free list, or
4033 * for this purpose it looks like it
4034 * just came off of the free list
4036 vm_page_free_prepare(m1
);
4039 * make sure we clear the ref/mod state
4040 * from the pmap layer... else we risk
4041 * inheriting state from the last time
4042 * this page was used...
4044 pmap_clear_refmod(m2
->phys_page
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
4046 * now put the substitute page on the object
4048 vm_page_insert_internal(m2
, locked_object
, offset
, TRUE
, TRUE
);
4051 vm_page_activate(m2
);
4053 vm_page_deactivate(m2
);
4055 PAGE_WAKEUP_DONE(m2
);
4059 * completely cleans up the state
4060 * of the page so that it is ready
4061 * to be put onto the free list, or
4062 * for this purpose it looks like it
4063 * just came off of the free list
4065 vm_page_free_prepare(m1
);
4071 m1
->pageq
.next
= (queue_entry_t
) m
;
4072 m1
->pageq
.prev
= NULL
;
4075 if (locked_object
) {
4076 vm_object_unlock(locked_object
);
4077 locked_object
= VM_OBJECT_NULL
;
4080 if (abort_run
== TRUE
) {
4081 if (m
!= VM_PAGE_NULL
) {
4082 vm_page_free_list(m
, FALSE
);
4088 * want the index of the last
4089 * page in this run that was
4090 * successfully 'stolen', so back
4091 * it up 1 for the auto-decrement on use
4092 * and 1 more to bump back over this page
4094 page_idx
= tmp_start_idx
+ 2;
4095 if (page_idx
>= vm_pages_count
) {
4098 page_idx
= last_idx
= 0;
4104 * We didn't find a contiguous range but we didn't
4105 * start from the very first page.
4106 * Start again from the very first page.
4108 RESET_STATE_OF_RUN();
4110 if( flags
& KMA_LOMEM
)
4111 idx_last_contig_page_found
= vm_page_lomem_find_contiguous_last_idx
= page_idx
;
4113 idx_last_contig_page_found
= vm_page_find_contiguous_last_idx
= page_idx
;
4115 last_idx
= page_idx
;
4117 lck_mtx_lock(&vm_page_queue_free_lock
);
4119 * reset our free page limit since we
4120 * dropped the lock protecting the vm_page_free_queue
4122 free_available
= vm_page_free_count
- vm_page_free_reserved
;
4126 for (m1
= m
; m1
!= VM_PAGE_NULL
; m1
= NEXT_PAGE(m1
)) {
4134 vm_page_gobble_count
+= npages
;
4137 * gobbled pages are also counted as wired pages
4139 vm_page_wire_count
+= npages
;
4141 assert(vm_page_verify_contiguous(m
, npages
));
4144 vm_page_unlock_queues();
4147 clock_get_system_microtime(&tv_end_sec
, &tv_end_usec
);
4149 tv_end_sec
-= tv_start_sec
;
4150 if (tv_end_usec
< tv_start_usec
) {
4152 tv_end_usec
+= 1000000;
4154 tv_end_usec
-= tv_start_usec
;
4155 if (tv_end_usec
>= 1000000) {
4157 tv_end_sec
-= 1000000;
4159 if (vm_page_find_contig_debug
) {
4160 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4161 __func__
, contig_pages
, max_pnum
, npages
, (vm_object_offset_t
)start_pnum
<< PAGE_SHIFT
,
4162 (long)tv_end_sec
, tv_end_usec
, orig_last_idx
,
4163 scanned
, yielded
, dumped_run
, stolen_pages
);
4168 vm_page_verify_free_lists();
4174 * Allocate a list of contiguous, wired pages.
4186 unsigned int npages
;
4188 if (size
% page_size
!= 0)
4189 return KERN_INVALID_ARGUMENT
;
4191 npages
= (unsigned int) (size
/ PAGE_SIZE
);
4192 if (npages
!= size
/ PAGE_SIZE
) {
4193 /* 32-bit overflow */
4194 return KERN_INVALID_ARGUMENT
;
4198 * Obtain a pointer to a subset of the free
4199 * list large enough to satisfy the request;
4200 * the region will be physically contiguous.
4202 pages
= vm_page_find_contiguous(npages
, max_pnum
, pnum_mask
, wire
, flags
);
4204 if (pages
== VM_PAGE_NULL
)
4205 return KERN_NO_SPACE
;
4207 * determine need for wakeups
4209 if ((vm_page_free_count
< vm_page_free_min
) ||
4210 ((vm_page_free_count
< vm_page_free_target
) &&
4211 ((vm_page_inactive_count
+ vm_page_speculative_count
) < vm_page_inactive_min
)))
4212 thread_wakeup((event_t
) &vm_page_free_wanted
);
4219 * Decide if we need to poke the memorystatus notification thread.
4222 (vm_page_active_count
+ vm_page_inactive_count
+
4223 vm_page_speculative_count
+ vm_page_free_count
+
4224 (IP_VALID(memory_manager_default
)?0:vm_page_purgeable_count
) ) * 100 /
4226 if (percent_avail
<= (kern_memorystatus_level
- 5)) {
4227 kern_memorystatus_level
= percent_avail
;
4228 thread_wakeup((event_t
)&kern_memorystatus_wakeup
);
4233 * The CPM pages should now be available and
4234 * ordered by ascending physical address.
4236 assert(vm_page_verify_contiguous(pages
, npages
));
4239 return KERN_SUCCESS
;
4249 vm_page_t lo_page_list
= VM_PAGE_NULL
;
4253 if ( !(flags
& KMA_LOMEM
))
4254 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4256 for (i
= 0; i
< page_count
; i
++) {
4258 mem
= vm_page_grablo();
4260 if (mem
== VM_PAGE_NULL
) {
4262 vm_page_free_list(lo_page_list
, FALSE
);
4264 *list
= VM_PAGE_NULL
;
4266 return (KERN_RESOURCE_SHORTAGE
);
4268 mem
->pageq
.next
= (queue_entry_t
) lo_page_list
;
4271 *list
= lo_page_list
;
4273 return (KERN_SUCCESS
);
4277 vm_page_set_offset(vm_page_t page
, vm_object_offset_t offset
)
4279 page
->offset
= offset
;
4283 vm_page_get_next(vm_page_t page
)
4285 return ((vm_page_t
) page
->pageq
.next
);
4289 vm_page_get_offset(vm_page_t page
)
4291 return (page
->offset
);
4295 vm_page_get_phys_page(vm_page_t page
)
4297 return (page
->phys_page
);
4301 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4305 static vm_page_t hibernate_gobble_queue
;
4307 extern boolean_t (* volatile consider_buffer_cache_collect
)(int);
4309 static int hibernate_drain_pageout_queue(struct vm_pageout_queue
*);
4310 static int hibernate_flush_dirty_pages(void);
4311 static int hibernate_flush_queue(queue_head_t
*, int);
4312 static void hibernate_dirty_page(vm_page_t
);
4314 void hibernate_flush_wait(void);
4315 void hibernate_mark_in_progress(void);
4316 void hibernate_clear_in_progress(void);
4319 struct hibernate_statistics
{
4320 int hibernate_considered
;
4321 int hibernate_reentered_on_q
;
4322 int hibernate_found_dirty
;
4323 int hibernate_skipped_cleaning
;
4324 int hibernate_skipped_transient
;
4325 int hibernate_skipped_precious
;
4326 int hibernate_queue_nolock
;
4327 int hibernate_queue_paused
;
4328 int hibernate_throttled
;
4329 int hibernate_throttle_timeout
;
4330 int hibernate_drained
;
4331 int hibernate_drain_timeout
;
4333 int cd_found_precious
;
4336 int cd_found_unusual
;
4337 int cd_found_cleaning
;
4338 int cd_found_laundry
;
4342 int cd_vm_page_wire_count
;
4351 hibernate_drain_pageout_queue(struct vm_pageout_queue
*q
)
4353 wait_result_t wait_result
;
4355 vm_page_lock_queues();
4357 while (q
->pgo_laundry
) {
4359 q
->pgo_draining
= TRUE
;
4361 assert_wait_timeout((event_t
) (&q
->pgo_laundry
+1), THREAD_INTERRUPTIBLE
, 5000, 1000*NSEC_PER_USEC
);
4363 vm_page_unlock_queues();
4365 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4367 if (wait_result
== THREAD_TIMED_OUT
) {
4368 hibernate_stats
.hibernate_drain_timeout
++;
4371 vm_page_lock_queues();
4373 hibernate_stats
.hibernate_drained
++;
4375 vm_page_unlock_queues();
4381 hibernate_dirty_page(vm_page_t m
)
4383 vm_object_t object
= m
->object
;
4384 struct vm_pageout_queue
*q
;
4387 lck_mtx_assert(&vm_page_queue_lock
, LCK_MTX_ASSERT_OWNED
);
4389 vm_object_lock_assert_exclusive(object
);
4392 * protect the object from collapse -
4393 * locking in the object's paging_offset.
4395 vm_object_paging_begin(object
);
4397 m
->list_req_pending
= TRUE
;
4401 if (object
->internal
== TRUE
)
4402 q
= &vm_pageout_queue_internal
;
4404 q
= &vm_pageout_queue_external
;
4407 * pgo_laundry count is tied to the laundry bit
4412 m
->pageout_queue
= TRUE
;
4413 queue_enter(&q
->pgo_pending
, m
, vm_page_t
, pageq
);
4415 if (q
->pgo_idle
== TRUE
) {
4416 q
->pgo_idle
= FALSE
;
4417 thread_wakeup((event_t
) &q
->pgo_pending
);
4422 hibernate_flush_queue(queue_head_t
*q
, int qcount
)
4425 vm_object_t l_object
= NULL
;
4426 vm_object_t m_object
= NULL
;
4427 int refmod_state
= 0;
4428 int try_failed_count
= 0;
4430 int current_run
= 0;
4431 struct vm_pageout_queue
*iq
;
4432 struct vm_pageout_queue
*eq
;
4433 struct vm_pageout_queue
*tq
;
4436 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_START
, q
, qcount
, 0, 0, 0);
4438 iq
= &vm_pageout_queue_internal
;
4439 eq
= &vm_pageout_queue_external
;
4441 vm_page_lock_queues();
4443 while (qcount
&& !queue_empty(q
)) {
4445 if (current_run
++ == 1000) {
4446 if (hibernate_should_abort()) {
4453 m
= (vm_page_t
) queue_first(q
);
4454 m_object
= m
->object
;
4457 * check to see if we currently are working
4458 * with the same object... if so, we've
4459 * already got the lock
4461 if (m_object
!= l_object
) {
4463 * the object associated with candidate page is
4464 * different from the one we were just working
4465 * with... dump the lock if we still own it
4467 if (l_object
!= NULL
) {
4468 vm_object_unlock(l_object
);
4472 * Try to lock object; since we've alread got the
4473 * page queues lock, we can only 'try' for this one.
4474 * if the 'try' fails, we need to do a mutex_pause
4475 * to allow the owner of the object lock a chance to
4478 if ( !vm_object_lock_try_scan(m_object
)) {
4480 if (try_failed_count
> 20) {
4481 hibernate_stats
.hibernate_queue_nolock
++;
4483 goto reenter_pg_on_q
;
4485 vm_pageout_scan_wants_object
= m_object
;
4487 vm_page_unlock_queues();
4488 mutex_pause(try_failed_count
++);
4489 vm_page_lock_queues();
4491 hibernate_stats
.hibernate_queue_paused
++;
4494 l_object
= m_object
;
4495 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4498 if ( !m_object
->alive
|| m
->encrypted_cleaning
|| m
->cleaning
|| m
->busy
|| m
->absent
|| m
->error
) {
4500 * page is not to be cleaned
4501 * put it back on the head of its queue
4504 hibernate_stats
.hibernate_skipped_cleaning
++;
4506 hibernate_stats
.hibernate_skipped_transient
++;
4508 goto reenter_pg_on_q
;
4510 if ( !m_object
->pager_initialized
&& m_object
->pager_created
)
4511 goto reenter_pg_on_q
;
4513 if (m_object
->copy
== VM_OBJECT_NULL
) {
4514 if (m_object
->purgable
== VM_PURGABLE_VOLATILE
|| m_object
->purgable
== VM_PURGABLE_EMPTY
) {
4516 * let the normal hibernate image path
4519 goto reenter_pg_on_q
;
4522 if ( !m
->dirty
&& m
->pmapped
) {
4523 refmod_state
= pmap_get_refmod(m
->phys_page
);
4525 if ((refmod_state
& VM_MEM_MODIFIED
))
4532 * page is not to be cleaned
4533 * put it back on the head of its queue
4536 hibernate_stats
.hibernate_skipped_precious
++;
4538 goto reenter_pg_on_q
;
4542 if (m_object
->internal
) {
4543 if (VM_PAGE_Q_THROTTLED(iq
))
4545 } else if (VM_PAGE_Q_THROTTLED(eq
))
4549 wait_result_t wait_result
;
4552 if (l_object
!= NULL
) {
4553 vm_object_unlock(l_object
);
4556 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4558 tq
->pgo_throttled
= TRUE
;
4560 while (retval
== 0) {
4562 assert_wait_timeout((event_t
) &tq
->pgo_laundry
, THREAD_INTERRUPTIBLE
, 1000, 1000*NSEC_PER_USEC
);
4564 vm_page_unlock_queues();
4566 wait_result
= thread_block(THREAD_CONTINUE_NULL
);
4568 vm_page_lock_queues();
4570 if (hibernate_should_abort())
4573 if (wait_result
!= THREAD_TIMED_OUT
)
4576 if (--wait_count
== 0) {
4577 hibernate_stats
.hibernate_throttle_timeout
++;
4584 hibernate_stats
.hibernate_throttled
++;
4588 VM_PAGE_QUEUES_REMOVE(m
);
4590 hibernate_dirty_page(m
);
4592 hibernate_stats
.hibernate_found_dirty
++;
4597 queue_remove(q
, m
, vm_page_t
, pageq
);
4598 queue_enter(q
, m
, vm_page_t
, pageq
);
4600 hibernate_stats
.hibernate_reentered_on_q
++;
4602 hibernate_stats
.hibernate_considered
++;
4605 try_failed_count
= 0;
4607 if (l_object
!= NULL
) {
4608 vm_object_unlock(l_object
);
4611 vm_pageout_scan_wants_object
= VM_OBJECT_NULL
;
4613 vm_page_unlock_queues();
4615 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 4) | DBG_FUNC_END
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0, 0);
4622 hibernate_flush_dirty_pages()
4624 struct vm_speculative_age_q
*aq
;
4627 bzero(&hibernate_stats
, sizeof(struct hibernate_statistics
));
4629 if (vm_page_local_q
) {
4630 for (i
= 0; i
< vm_page_local_q_count
; i
++)
4631 vm_page_reactivate_local(i
, TRUE
, FALSE
);
4634 for (i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++) {
4638 aq
= &vm_page_queue_speculative
[i
];
4640 if (queue_empty(&aq
->age_q
))
4644 vm_page_lockspin_queues();
4646 queue_iterate(&aq
->age_q
,
4653 vm_page_unlock_queues();
4656 if (hibernate_flush_queue(&aq
->age_q
, qcount
))
4660 if (hibernate_flush_queue(&vm_page_queue_active
, vm_page_active_count
))
4662 if (hibernate_flush_queue(&vm_page_queue_inactive
, vm_page_inactive_count
- vm_zf_queue_count
))
4664 if (hibernate_flush_queue(&vm_page_queue_zf
, vm_zf_queue_count
))
4667 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal
))
4669 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external
));
4673 extern void IOSleep(unsigned int);
4674 extern int sync_internal(void);
4677 hibernate_flush_memory()
4681 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_START
, vm_page_free_count
, 0, 0, 0, 0);
4685 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_NONE
, vm_page_free_count
, 0, 0, 0, 0);
4687 if ((retval
= hibernate_flush_dirty_pages()) == 0) {
4688 if (consider_buffer_cache_collect
!= NULL
) {
4690 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_START
, vm_page_wire_count
, 0, 0, 0, 0);
4693 (void)(*consider_buffer_cache_collect
)(1);
4694 consider_zone_gc(1);
4696 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 7) | DBG_FUNC_END
, vm_page_wire_count
, 0, 0, 0, 0);
4699 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 3) | DBG_FUNC_END
, vm_page_free_count
, hibernate_stats
.hibernate_found_dirty
, retval
, 0, 0);
4701 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4702 hibernate_stats
.hibernate_considered
,
4703 hibernate_stats
.hibernate_reentered_on_q
,
4704 hibernate_stats
.hibernate_found_dirty
);
4705 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4706 hibernate_stats
.hibernate_skipped_cleaning
,
4707 hibernate_stats
.hibernate_skipped_transient
,
4708 hibernate_stats
.hibernate_skipped_precious
,
4709 hibernate_stats
.hibernate_queue_nolock
);
4710 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4711 hibernate_stats
.hibernate_queue_paused
,
4712 hibernate_stats
.hibernate_throttled
,
4713 hibernate_stats
.hibernate_throttle_timeout
,
4714 hibernate_stats
.hibernate_drained
,
4715 hibernate_stats
.hibernate_drain_timeout
);
4721 hibernate_page_list_zero(hibernate_page_list_t
*list
)
4724 hibernate_bitmap_t
* bitmap
;
4726 bitmap
= &list
->bank_bitmap
[0];
4727 for (bank
= 0; bank
< list
->bank_count
; bank
++)
4731 bzero((void *) &bitmap
->bitmap
[0], bitmap
->bitmapwords
<< 2);
4732 // set out-of-bound bits at end of bitmap.
4733 last_bit
= ((bitmap
->last_page
- bitmap
->first_page
+ 1) & 31);
4735 bitmap
->bitmap
[bitmap
->bitmapwords
- 1] = (0xFFFFFFFF >> last_bit
);
4737 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
4742 hibernate_gobble_pages(uint32_t gobble_count
, uint32_t free_page_time
)
4746 uint64_t start
, end
, timeout
, nsec
;
4747 clock_interval_to_deadline(free_page_time
, 1000 * 1000 /*ms*/, &timeout
);
4748 clock_get_uptime(&start
);
4750 for (i
= 0; i
< gobble_count
; i
++)
4752 while (VM_PAGE_NULL
== (m
= vm_page_grab()))
4754 clock_get_uptime(&end
);
4764 m
->pageq
.next
= (queue_entry_t
) hibernate_gobble_queue
;
4765 hibernate_gobble_queue
= m
;
4768 clock_get_uptime(&end
);
4769 absolutetime_to_nanoseconds(end
- start
, &nsec
);
4770 HIBLOG("Gobbled %d pages, time: %qd ms\n", i
, nsec
/ 1000000ULL);
4774 hibernate_free_gobble_pages(void)
4779 m
= (vm_page_t
) hibernate_gobble_queue
;
4782 next
= (vm_page_t
) m
->pageq
.next
;
4787 hibernate_gobble_queue
= VM_PAGE_NULL
;
4790 HIBLOG("Freed %d pages\n", count
);
4794 hibernate_consider_discard(vm_page_t m
)
4796 vm_object_t object
= NULL
;
4798 boolean_t discard
= FALSE
;
4803 panic("hibernate_consider_discard: private");
4805 if (!vm_object_lock_try(m
->object
)) {
4806 hibernate_stats
.cd_lock_failed
++;
4811 if (VM_PAGE_WIRED(m
)) {
4812 hibernate_stats
.cd_found_wired
++;
4816 hibernate_stats
.cd_found_precious
++;
4819 if (m
->busy
|| !object
->alive
) {
4821 * Somebody is playing with this page.
4823 hibernate_stats
.cd_found_busy
++;
4826 if (m
->absent
|| m
->unusual
|| m
->error
) {
4828 * If it's unusual in anyway, ignore it
4830 hibernate_stats
.cd_found_unusual
++;
4834 hibernate_stats
.cd_found_cleaning
++;
4837 if (m
->laundry
|| m
->list_req_pending
) {
4838 hibernate_stats
.cd_found_laundry
++;
4843 refmod_state
= pmap_get_refmod(m
->phys_page
);
4845 if (refmod_state
& VM_MEM_REFERENCED
)
4846 m
->reference
= TRUE
;
4847 if (refmod_state
& VM_MEM_MODIFIED
)
4852 * If it's clean or purgeable we can discard the page on wakeup.
4854 discard
= (!m
->dirty
)
4855 || (VM_PURGABLE_VOLATILE
== object
->purgable
)
4856 || (VM_PURGABLE_EMPTY
== object
->purgable
);
4858 if (discard
== FALSE
)
4859 hibernate_stats
.cd_found_dirty
++;
4864 vm_object_unlock(object
);
4871 hibernate_discard_page(vm_page_t m
)
4873 if (m
->absent
|| m
->unusual
|| m
->error
)
4875 * If it's unusual in anyway, ignore
4879 if (m
->pmapped
== TRUE
)
4881 __unused
int refmod_state
= pmap_disconnect(m
->phys_page
);
4885 panic("hibernate_discard_page(%p) laundry", m
);
4887 panic("hibernate_discard_page(%p) private", m
);
4889 panic("hibernate_discard_page(%p) fictitious", m
);
4891 if (VM_PURGABLE_VOLATILE
== m
->object
->purgable
)
4893 /* object should be on a queue */
4894 assert((m
->object
->objq
.next
!= NULL
) && (m
->object
->objq
.prev
!= NULL
));
4895 purgeable_q_t old_queue
= vm_purgeable_object_remove(m
->object
);
4897 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4898 makes sure these locks are uncontended before sleep */
4899 vm_purgeable_token_delete_first(old_queue
);
4900 m
->object
->purgable
= VM_PURGABLE_EMPTY
;
4907 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4908 pages known to VM to not need saving are subtracted.
4909 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4913 hibernate_page_list_setall(hibernate_page_list_t
* page_list
,
4914 hibernate_page_list_t
* page_list_wired
,
4915 uint32_t * pagesOut
)
4917 uint64_t start
, end
, nsec
;
4919 uint32_t pages
= page_list
->page_count
;
4920 uint32_t count_zf
= 0, count_throttled
= 0;
4921 uint32_t count_inactive
= 0, count_active
= 0, count_speculative
= 0;
4922 uint32_t count_wire
= pages
;
4923 uint32_t count_discard_active
= 0;
4924 uint32_t count_discard_inactive
= 0;
4925 uint32_t count_discard_purgeable
= 0;
4926 uint32_t count_discard_speculative
= 0;
4929 hibernate_bitmap_t
* bitmap
;
4930 hibernate_bitmap_t
* bitmap_wired
;
4933 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list
, page_list_wired
);
4935 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_START
, count_wire
, 0, 0, 0, 0);
4937 clock_get_uptime(&start
);
4939 hibernate_page_list_zero(page_list
);
4940 hibernate_page_list_zero(page_list_wired
);
4942 hibernate_stats
.cd_vm_page_wire_count
= vm_page_wire_count
;
4943 hibernate_stats
.cd_pages
= pages
;
4945 if (vm_page_local_q
) {
4946 for (i
= 0; i
< vm_page_local_q_count
; i
++)
4947 vm_page_reactivate_local(i
, TRUE
, TRUE
);
4950 m
= (vm_page_t
) hibernate_gobble_queue
;
4955 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4956 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4957 m
= (vm_page_t
) m
->pageq
.next
;
4960 for( i
= 0; i
< real_ncpus
; i
++ )
4962 if (cpu_data_ptr
[i
] && cpu_data_ptr
[i
]->cpu_processor
)
4964 for (m
= PROCESSOR_DATA(cpu_data_ptr
[i
]->cpu_processor
, free_pages
); m
; m
= (vm_page_t
)m
->pageq
.next
)
4968 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4969 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4971 hibernate_stats
.cd_local_free
++;
4972 hibernate_stats
.cd_total_free
++;
4977 for( i
= 0; i
< vm_colors
; i
++ )
4979 queue_iterate(&vm_page_queue_free
[i
],
4986 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
4987 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
4989 hibernate_stats
.cd_total_free
++;
4993 queue_iterate(&vm_lopage_queue_free
,
5000 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5001 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5003 hibernate_stats
.cd_total_free
++;
5006 queue_iterate( &vm_page_queue_throttled
,
5011 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5012 && hibernate_consider_discard(m
))
5014 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5015 count_discard_inactive
++;
5020 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5023 queue_iterate( &vm_page_queue_zf
,
5028 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5029 && hibernate_consider_discard(m
))
5031 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5033 count_discard_purgeable
++;
5035 count_discard_inactive
++;
5040 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5043 queue_iterate( &vm_page_queue_inactive
,
5048 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5049 && hibernate_consider_discard(m
))
5051 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5053 count_discard_purgeable
++;
5055 count_discard_inactive
++;
5060 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5063 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5065 queue_iterate(&vm_page_queue_speculative
[i
].age_q
,
5070 if ((kIOHibernateModeDiscardCleanInactive
& gIOHibernateMode
)
5071 && hibernate_consider_discard(m
))
5073 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5074 count_discard_speculative
++;
5077 count_speculative
++;
5079 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5083 queue_iterate( &vm_page_queue_active
,
5088 if ((kIOHibernateModeDiscardCleanActive
& gIOHibernateMode
)
5089 && hibernate_consider_discard(m
))
5091 hibernate_page_bitset(page_list
, TRUE
, m
->phys_page
);
5093 count_discard_purgeable
++;
5095 count_discard_active
++;
5100 hibernate_page_bitset(page_list_wired
, TRUE
, m
->phys_page
);
5103 // pull wired from hibernate_bitmap
5105 bitmap
= &page_list
->bank_bitmap
[0];
5106 bitmap_wired
= &page_list_wired
->bank_bitmap
[0];
5107 for (bank
= 0; bank
< page_list
->bank_count
; bank
++)
5109 for (i
= 0; i
< bitmap
->bitmapwords
; i
++)
5110 bitmap
->bitmap
[i
] = bitmap
->bitmap
[i
] | ~bitmap_wired
->bitmap
[i
];
5111 bitmap
= (hibernate_bitmap_t
*) &bitmap
->bitmap
[bitmap
->bitmapwords
];
5112 bitmap_wired
= (hibernate_bitmap_t
*) &bitmap_wired
->bitmap
[bitmap_wired
->bitmapwords
];
5115 // machine dependent adjustments
5116 hibernate_page_list_setall_machine(page_list
, page_list_wired
, &pages
);
5118 hibernate_stats
.cd_count_wire
= count_wire
;
5119 hibernate_stats
.cd_discarded
= count_discard_active
+ count_discard_inactive
+ count_discard_purgeable
+ count_discard_speculative
;
5121 clock_get_uptime(&end
);
5122 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5123 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec
/ 1000000ULL);
5125 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5126 pages
, count_wire
, count_active
, count_inactive
, count_speculative
, count_zf
, count_throttled
,
5127 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
5129 *pagesOut
= pages
- count_discard_active
- count_discard_inactive
- count_discard_purgeable
- count_discard_speculative
;
5131 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE
, 8) | DBG_FUNC_END
, count_wire
, *pagesOut
, 0, 0, 0);
5135 hibernate_page_list_discard(hibernate_page_list_t
* page_list
)
5137 uint64_t start
, end
, nsec
;
5141 uint32_t count_discard_active
= 0;
5142 uint32_t count_discard_inactive
= 0;
5143 uint32_t count_discard_purgeable
= 0;
5144 uint32_t count_discard_speculative
= 0;
5146 clock_get_uptime(&start
);
5148 m
= (vm_page_t
) queue_first(&vm_page_queue_zf
);
5149 while (m
&& !queue_end(&vm_page_queue_zf
, (queue_entry_t
)m
))
5151 next
= (vm_page_t
) m
->pageq
.next
;
5152 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5155 count_discard_purgeable
++;
5157 count_discard_inactive
++;
5158 hibernate_discard_page(m
);
5163 for( i
= 0; i
<= VM_PAGE_MAX_SPECULATIVE_AGE_Q
; i
++ )
5165 m
= (vm_page_t
) queue_first(&vm_page_queue_speculative
[i
].age_q
);
5166 while (m
&& !queue_end(&vm_page_queue_speculative
[i
].age_q
, (queue_entry_t
)m
))
5168 next
= (vm_page_t
) m
->pageq
.next
;
5169 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5171 count_discard_speculative
++;
5172 hibernate_discard_page(m
);
5178 m
= (vm_page_t
) queue_first(&vm_page_queue_inactive
);
5179 while (m
&& !queue_end(&vm_page_queue_inactive
, (queue_entry_t
)m
))
5181 next
= (vm_page_t
) m
->pageq
.next
;
5182 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5185 count_discard_purgeable
++;
5187 count_discard_inactive
++;
5188 hibernate_discard_page(m
);
5193 m
= (vm_page_t
) queue_first(&vm_page_queue_active
);
5194 while (m
&& !queue_end(&vm_page_queue_active
, (queue_entry_t
)m
))
5196 next
= (vm_page_t
) m
->pageq
.next
;
5197 if (hibernate_page_bittst(page_list
, m
->phys_page
))
5200 count_discard_purgeable
++;
5202 count_discard_active
++;
5203 hibernate_discard_page(m
);
5208 clock_get_uptime(&end
);
5209 absolutetime_to_nanoseconds(end
- start
, &nsec
);
5210 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5212 count_discard_active
, count_discard_inactive
, count_discard_purgeable
, count_discard_speculative
);
5215 #endif /* HIBERNATION */
5217 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5219 #include <mach_vm_debug.h>
5222 #include <mach_debug/hash_info.h>
5223 #include <vm/vm_debug.h>
5226 * Routine: vm_page_info
5228 * Return information about the global VP table.
5229 * Fills the buffer with as much information as possible
5230 * and returns the desired size of the buffer.
5232 * Nothing locked. The caller should provide
5233 * possibly-pageable memory.
5238 hash_info_bucket_t
*info
,
5242 lck_spin_t
*bucket_lock
;
5244 if (vm_page_bucket_count
< count
)
5245 count
= vm_page_bucket_count
;
5247 for (i
= 0; i
< count
; i
++) {
5248 vm_page_bucket_t
*bucket
= &vm_page_buckets
[i
];
5249 unsigned int bucket_count
= 0;
5252 bucket_lock
= &vm_page_bucket_locks
[i
/ BUCKETS_PER_LOCK
];
5253 lck_spin_lock(bucket_lock
);
5255 for (m
= bucket
->pages
; m
!= VM_PAGE_NULL
; m
= m
->next
)
5258 lck_spin_unlock(bucket_lock
);
5260 /* don't touch pageable memory while holding locks */
5261 info
[i
].hib_count
= bucket_count
;
5264 return vm_page_bucket_count
;
5266 #endif /* MACH_VM_DEBUG */
5268 #include <mach_kdb.h>
5271 #include <ddb/db_output.h>
5272 #include <vm/vm_print.h>
5273 #define printf kdbprintf
5276 * Routine: vm_page_print [exported]
5284 p
= (vm_page_t
) (long) db_addr
;
5286 iprintf("page 0x%x\n", p
);
5290 iprintf("object=0x%x", p
->object
);
5291 printf(", offset=0x%x", p
->offset
);
5292 printf(", wire_count=%d", p
->wire_count
);
5294 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5295 (p
->local
? "" : "!"),
5296 (p
->inactive
? "" : "!"),
5297 (p
->active
? "" : "!"),
5298 (p
->throttled
? "" : "!"),
5299 (p
->gobbled
? "" : "!"),
5300 (p
->laundry
? "" : "!"),
5301 (p
->free
? "" : "!"),
5302 (p
->reference
? "" : "!"),
5303 (p
->encrypted
? "" : "!"));
5304 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5305 (p
->busy
? "" : "!"),
5306 (p
->wanted
? "" : "!"),
5307 (p
->tabled
? "" : "!"),
5308 (p
->fictitious
? "" : "!"),
5309 (p
->private ? "" : "!"),
5310 (p
->precious
? "" : "!"));
5311 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5312 (p
->absent
? "" : "!"),
5313 (p
->error
? "" : "!"),
5314 (p
->dirty
? "" : "!"),
5315 (p
->cleaning
? "" : "!"),
5316 (p
->pageout
? "" : "!"),
5317 (p
->clustered
? "" : "!"));
5318 iprintf("%soverwriting, %srestart, %sunusual\n",
5319 (p
->overwriting
? "" : "!"),
5320 (p
->restart
? "" : "!"),
5321 (p
->unusual
? "" : "!"));
5323 iprintf("phys_page=0x%x", p
->phys_page
);
5327 #endif /* MACH_KDB */