2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
63 * Resident memory system definitions.
66 #ifndef _VM_VM_PAGE_H_
67 #define _VM_VM_PAGE_H_
71 #include <mach/boolean.h>
72 #include <mach/vm_prot.h>
73 #include <mach/vm_param.h>
74 #include <vm/vm_object.h>
75 #include <kern/queue.h>
76 #include <kern/lock.h>
78 #include <kern/macro_help.h>
79 #include <libkern/OSAtomic.h>
83 * VM_PAGE_MIN_SPECULATIVE_AGE_Q through VM_PAGE_MAX_SPECULATIVE_AGE_Q
84 * represents a set of aging bins that are 'protected'...
86 * VM_PAGE_SPECULATIVE_AGED_Q is a list of the speculative pages that have
87 * not yet been 'claimed' but have been aged out of the protective bins
88 * this occurs in vm_page_speculate when it advances to the next bin
89 * and discovers that it is still occupied... at that point, all of the
90 * pages in that bin are moved to the VM_PAGE_SPECULATIVE_AGED_Q. the pages
91 * in that bin are all guaranteed to have reached at least the maximum age
92 * we allow for a protected page... they can be older if there is no
93 * memory pressure to pull them from the bin, or there are no new speculative pages
94 * being generated to push them out.
95 * this list is the one that vm_pageout_scan will prefer when looking
96 * for pages to move to the underweight free list
98 * VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS
99 * defines the amount of time a speculative page is normally
100 * allowed to live in the 'protected' state (i.e. not available
101 * to be stolen if vm_pageout_scan is running and looking for
102 * pages)... however, if the total number of speculative pages
103 * in the protected state exceeds our limit (defined in vm_pageout.c)
104 * and there are none available in VM_PAGE_SPECULATIVE_AGED_Q, then
105 * vm_pageout_scan is allowed to steal pages from the protected
106 * bucket even if they are underage.
108 * vm_pageout_scan is also allowed to pull pages from a protected
109 * bin if the bin has reached the "age of consent" we've set
111 #define VM_PAGE_MAX_SPECULATIVE_AGE_Q 10
112 #define VM_PAGE_MIN_SPECULATIVE_AGE_Q 1
113 #define VM_PAGE_SPECULATIVE_AGED_Q 0
115 #define VM_PAGE_SPECULATIVE_Q_AGE_MS 500
118 struct vm_speculative_age_q
{
120 * memory queue for speculative pages via clustered pageins
123 mach_timespec_t age_ts
;
128 struct vm_speculative_age_q vm_page_queue_speculative
[];
130 extern int speculative_steal_index
;
131 extern int speculative_age_index
;
135 * Management of resident (logical) pages.
137 * A small structure is kept for each resident
138 * page, indexed by page number. Each structure
139 * is an element of several lists:
141 * A hash table bucket used to quickly
142 * perform object/offset lookups
144 * A list of all pages for a given object,
145 * so they can be quickly deactivated at
146 * time of deallocation.
148 * An ordered list of pages due for pageout.
150 * In addition, the structure contains the object
151 * and offset to which this page belongs (for pageout),
152 * and sundry status bits.
154 * Fields in this structure are locked either by the lock on the
155 * object that the page belongs to (O) or by the lock on the page
156 * queues (P). [Some fields require that both locks be held to
157 * change that field; holding either lock is sufficient to read.]
161 queue_chain_t pageq
; /* queue info for FIFO
162 * queue or free list (P) */
163 queue_chain_t listq
; /* all pages in same object (O) */
164 struct vm_page
*next
; /* VP bucket link (O) */
166 vm_object_t object
; /* which object am I in (O&P) */
167 vm_object_offset_t offset
; /* offset into that object (O,P) */
170 * The following word of flags is protected
171 * by the "page queues" lock.
173 unsigned int wire_count
:16, /* how many wired down maps use me? (O&P) */
174 /* boolean_t */ inactive
:1, /* page is in inactive list (P) */
175 active
:1, /* page is in active list (P) */
176 pageout_queue
:1,/* page is on queue for pageout (P) */
177 speculative
:1, /* page is on speculative list (P) */
178 laundry
:1, /* page is being cleaned now (P)*/
179 free
:1, /* page is on free list (P) */
180 reference
:1, /* page has been used (P) */
181 pageout
:1, /* page wired & busy for pageout (P) */
182 gobbled
:1, /* page used internally (P) */
183 private:1, /* Page should not be returned to
184 * the free list (P) */
185 throttled
:1, /* pager is not responding (P) */
186 __unused_pageq_bits
:5; /* 5 bits available here */
189 * The following word of flags is protected
190 * by the "VM object" lock.
193 /* boolean_t */ busy
:1, /* page is in transit (O) */
194 wanted
:1, /* someone is waiting for page (O) */
195 tabled
:1, /* page is in VP table (O) */
196 fictitious
:1, /* Physical page doesn't exist (O) */
197 pmapped
:1, /* page has been entered at some
198 * point into a pmap (O) */
199 wpmapped
:1, /* page has been entered at some
200 * point into a pmap for write (O) */
201 absent
:1, /* Data has been requested, but is
202 * not yet available (O) */
203 error
:1, /* Data manager was unable to provide
204 * data due to error (O) */
205 dirty
:1, /* Page must be cleaned (O) */
206 cleaning
:1, /* Page clean has begun (O) */
207 precious
:1, /* Page is precious; data must be
208 * returned even if clean (O) */
209 clustered
:1, /* page is not the faulted page (O) */
210 overwriting
:1, /* Request to unlock has been made
211 * without having data. (O)
212 * [See vm_fault_page_overwrite] */
213 restart
:1, /* Page was pushed higher in shadow
214 chain by copy_call-related pagers;
215 start again at top of chain */
216 unusual
:1, /* Page is absent, error, restart or
218 encrypted
:1, /* encrypted for secure swap (O) */
219 encrypted_cleaning
:1, /* encrypting page */
220 list_req_pending
:1, /* pagein/pageout alt mechanism */
221 /* allows creation of list */
222 /* requests on pages that are */
223 /* actively being paged. */
224 dump_cleaning
:1, /* set by the pageout daemon when */
225 /* a page being cleaned is */
226 /* encountered and targeted as */
227 /* a pageout candidate */
228 cs_validated
:1, /* code-signing: page was checked */
229 cs_tainted
:1, /* code-signing: page is tainted */
230 no_cache
:1, /* page is not to be cached and */
231 /* should be reused ahead of */
235 __unused_object_bits
:8; /* 8 bits available here */
237 ppnum_t phys_page
; /* Physical address of page, passed
238 * to pmap_enter (read-only) */
241 #define DEBUG_ENCRYPTED_SWAP 1
242 #if DEBUG_ENCRYPTED_SWAP
243 #define ASSERT_PAGE_DECRYPTED(page) \
245 if ((page)->encrypted) { \
246 panic("VM page %p should not be encrypted here\n", \
250 #else /* DEBUG_ENCRYPTED_SWAP */
251 #define ASSERT_PAGE_DECRYPTED(page) assert(!(page)->encrypted)
252 #endif /* DEBUG_ENCRYPTED_SWAP */
254 typedef struct vm_page
*vm_page_t
;
256 #define VM_PAGE_NULL ((vm_page_t) 0)
257 #define NEXT_PAGE(m) ((vm_page_t) (m)->pageq.next)
258 #define NEXT_PAGE_PTR(m) ((vm_page_t *) &(m)->pageq.next)
261 * XXX The unusual bit should not be necessary. Most of the bit
262 * XXX fields above really want to be masks.
266 * For debugging, this macro can be defined to perform
267 * some useful check on a page structure.
270 #define VM_PAGE_CHECK(mem) do {} while (0)
274 * The free page list is actually n lists, one per color,
275 * where the number of colors is a function of the machine's
276 * cache geometry set at system initialization. To disable
277 * coloring, set vm_colors to 1 and vm_color_mask to 0.
278 * The boot-arg "colors" may be used to override vm_colors.
279 * Note that there is little harm in having more colors than needed.
282 #define MAX_COLORS 128
283 #define DEFAULT_COLORS 32
286 unsigned int vm_colors
; /* must be in range 1..MAX_COLORS */
288 unsigned int vm_color_mask
; /* must be (vm_colors-1) */
290 unsigned int vm_cache_geometry_colors
; /* optimal #colors based on cache geometry */
293 * Each pageable resident page falls into one of three lists:
296 * Available for allocation now. The free list is
297 * actually an array of lists, one per color.
299 * Not referenced in any map, but still has an
300 * object/offset-page mapping, and may be dirty.
301 * This is the list of pages that should be
302 * paged out next. There are actually two
303 * inactive lists, one for pages brought in from
304 * disk or other backing store, and another
305 * for "zero-filled" pages. See vm_pageout_scan()
306 * for the distinction and usage.
308 * A list of pages which have been placed in
309 * at least one physical map. This list is
310 * ordered, in LRU-like fashion.
314 queue_head_t vm_page_queue_free
[MAX_COLORS
]; /* memory free queue */
316 queue_head_t vm_lopage_queue_free
; /* low memory free queue */
318 vm_page_t vm_page_queue_fictitious
; /* fictitious free queue */
320 queue_head_t vm_page_queue_active
; /* active memory queue */
322 queue_head_t vm_page_queue_inactive
; /* inactive memory queue for normal pages */
324 queue_head_t vm_page_queue_zf
; /* inactive memory queue for zero fill */
325 queue_head_t vm_page_queue_throttled
; /* memory queue for throttled pageout pages */
328 vm_offset_t first_phys_addr
; /* physical address for first_page */
330 vm_offset_t last_phys_addr
; /* physical address for last_page */
333 unsigned int vm_page_free_count
; /* How many pages are free? (sum of all colors) */
335 unsigned int vm_page_fictitious_count
;/* How many fictitious pages are free? */
337 unsigned int vm_page_active_count
; /* How many pages are active? */
339 unsigned int vm_page_inactive_count
; /* How many pages are inactive? */
341 unsigned int vm_page_throttled_count
;/* How many inactives are throttled */
343 unsigned int vm_page_speculative_count
; /* How many speculative pages are unclaimed? */
345 unsigned int vm_page_wire_count
; /* How many pages are wired? */
347 vm_map_size_t vm_user_wire_limit
; /* How much memory can be locked by a user? */
349 vm_map_size_t vm_global_user_wire_limit
; /* How much memory can be locked system wide by users? */
351 unsigned int vm_page_free_target
; /* How many do we want free? */
353 unsigned int vm_page_free_min
; /* When to wakeup pageout */
355 unsigned int vm_page_inactive_target
;/* How many do we want inactive? */
357 unsigned int vm_page_inactive_min
; /* When do wakeup pageout */
359 unsigned int vm_page_free_reserved
; /* How many pages reserved to do pageout */
361 unsigned int vm_page_zfill_throttle_count
;/* Count of zero-fill allocations throttled */
363 unsigned int vm_page_gobble_count
;
366 unsigned int vm_page_speculative_unused
;
368 unsigned int vm_page_speculative_used
;
370 unsigned int vm_page_purgeable_count
;/* How many pages are purgeable now ? */
372 uint64_t vm_page_purged_count
; /* How many pages got purged so far ? */
374 decl_mutex_data(,vm_page_queue_lock
)
375 /* lock on active and inactive page queues */
376 decl_mutex_data(,vm_page_queue_free_lock
)
377 /* lock on free page queue array (ie, all colors) */
379 extern unsigned int vm_page_free_wanted
;
380 /* how many threads are waiting for memory */
382 extern unsigned int vm_page_free_wanted_privileged
;
383 /* how many VM privileged threads are waiting for memory */
385 extern vm_offset_t vm_page_fictitious_addr
;
386 /* (fake) phys_addr of fictitious pages */
388 extern vm_offset_t vm_page_guard_addr
;
389 /* (fake) phys_addr of guard pages */
392 extern boolean_t vm_page_deactivate_hint
;
394 // 0 = all pages avail, 1 = disable high mem, 2 = prefer himem
395 extern int vm_himemory_mode
;
397 extern ppnum_t vm_lopage_poolend
;
398 extern int vm_lopage_poolsize
;
399 extern uint64_t max_valid_dma_address
;
403 * Prototypes for functions exported by this module.
405 extern void vm_page_bootstrap(
407 vm_offset_t
*endp
) __attribute__((section("__TEXT, initcode")));
409 extern void vm_page_module_init(void) __attribute__((section("__TEXT, initcode")));
411 extern void vm_page_create(
415 extern vm_page_t
vm_page_lookup(
417 vm_object_offset_t offset
);
419 extern vm_page_t
vm_page_grab_fictitious(void);
421 extern vm_page_t
vm_page_grab_guard(void);
423 extern void vm_page_release_fictitious(
426 extern void vm_page_more_fictitious(void);
428 extern int vm_pool_low(void);
430 extern vm_page_t
vm_page_grab(void);
432 extern vm_page_t
vm_page_grablo(void);
434 extern void vm_page_release(
437 extern boolean_t
vm_page_wait(
440 extern vm_page_t
vm_page_alloc(
442 vm_object_offset_t offset
);
444 extern vm_page_t
vm_page_alloclo(
446 vm_object_offset_t offset
);
448 extern vm_page_t
vm_page_alloc_guard(
450 vm_object_offset_t offset
);
452 extern void vm_page_init(
456 extern void vm_page_free(
459 extern void vm_page_free_prepare(
462 extern void vm_page_activate(
465 extern void vm_page_deactivate(
468 extern void vm_page_lru(
471 extern void vm_page_speculate(
475 extern void vm_page_speculate_ageit(
476 struct vm_speculative_age_q
*aq
);
478 extern void vm_page_rename(
480 vm_object_t new_object
,
481 vm_object_offset_t new_offset
,
482 boolean_t encrypted_ok
);
484 extern void vm_page_insert(
487 vm_object_offset_t offset
);
489 extern void vm_page_insert_internal(
492 vm_object_offset_t offset
,
493 boolean_t queues_lock_held
);
495 extern void vm_page_replace(
498 vm_object_offset_t offset
);
500 extern void vm_page_remove(
503 extern void vm_page_zero_fill(
506 extern void vm_page_part_zero_fill(
511 extern void vm_page_copy(
513 vm_page_t dest_page
);
515 extern void vm_page_part_copy(
522 extern void vm_page_wire(
525 extern void vm_page_unwire(
528 extern void vm_set_page_size(void);
530 extern void vm_page_gobble(
533 extern void vm_page_validate_cs(vm_page_t page
);
536 * Functions implemented as macros. m->wanted and m->busy are
537 * protected by the object lock.
540 #define PAGE_ASSERT_WAIT(m, interruptible) \
541 (((m)->wanted = TRUE), \
542 assert_wait((event_t) (m), (interruptible)))
544 #define PAGE_SLEEP(o, m, interruptible) \
545 (((m)->wanted = TRUE), \
546 thread_sleep_vm_object((o), (m), (interruptible)))
548 #define PAGE_WAKEUP_DONE(m) \
552 (m)->wanted = FALSE; \
553 thread_wakeup((event_t) (m)); \
557 #define PAGE_WAKEUP(m) \
560 (m)->wanted = FALSE; \
561 thread_wakeup((event_t) (m)); \
565 #define VM_PAGE_FREE(p) \
567 vm_page_lock_queues(); \
569 vm_page_unlock_queues(); \
572 #define VM_PAGE_GRAB_FICTITIOUS(M) \
574 while ((M = vm_page_grab_fictitious()) == VM_PAGE_NULL) \
575 vm_page_more_fictitious(); \
578 #define VM_PAGE_ZFILL_THROTTLED() \
579 (vm_page_free_count < vm_page_free_min && \
580 !(current_thread()->options & TH_OPT_VMPRIV) && \
581 ++vm_page_zfill_throttle_count)
583 #define VM_PAGE_WAIT() ((void)vm_page_wait(THREAD_UNINT))
585 #define vm_page_lock_queues() mutex_lock(&vm_page_queue_lock)
586 #define vm_page_unlock_queues() mutex_unlock(&vm_page_queue_lock)
588 #define vm_page_lockspin_queues() mutex_lock_spin(&vm_page_queue_lock)
590 #define VM_PAGE_QUEUES_REMOVE(mem) \
592 assert(!mem->laundry); \
594 assert(mem->object != kernel_object); \
595 assert(!mem->inactive && !mem->speculative); \
596 assert(!mem->throttled); \
597 queue_remove(&vm_page_queue_active, \
598 mem, vm_page_t, pageq); \
599 mem->active = FALSE; \
600 if (!mem->fictitious) { \
601 vm_page_active_count--; \
603 assert(mem->phys_page == \
604 vm_page_fictitious_addr); \
608 else if (mem->inactive) { \
609 assert(mem->object != kernel_object); \
610 assert(!mem->active && !mem->speculative); \
611 assert(!mem->throttled); \
612 if (mem->zero_fill) { \
613 queue_remove(&vm_page_queue_zf, \
614 mem, vm_page_t, pageq); \
615 vm_zf_queue_count--; \
617 queue_remove(&vm_page_queue_inactive, \
618 mem, vm_page_t, pageq); \
620 mem->inactive = FALSE; \
621 if (!mem->fictitious) { \
622 vm_page_inactive_count--; \
623 vm_purgeable_q_advance_all(1); \
625 assert(mem->phys_page == \
626 vm_page_fictitious_addr); \
630 else if (mem->throttled) { \
631 assert(!mem->active && !mem->inactive); \
632 assert(!mem->speculative); \
633 queue_remove(&vm_page_queue_throttled, \
634 mem, vm_page_t, pageq); \
635 mem->throttled = FALSE; \
636 if (!mem->fictitious) \
637 vm_page_throttled_count--; \
640 else if (mem->speculative) { \
641 assert(!mem->active && !mem->inactive); \
642 assert(!mem->throttled); \
643 assert(!mem->fictitious); \
644 remque(&mem->pageq); \
645 mem->speculative = FALSE; \
646 vm_page_speculative_count--; \
648 mem->pageq.next = NULL; \
649 mem->pageq.prev = NULL; \
653 #define VM_PAGE_CONSUME_CLUSTERED(mem) \
655 if (mem->clustered) { \
656 assert(mem->object); \
657 mem->object->pages_used++; \
658 mem->clustered = FALSE; \
659 OSAddAtomic(1, (SInt32 *)&vm_page_speculative_used); \
663 #endif /* _VM_VM_PAGE_H_ */