]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
3e170ce0 67#include <libkern/OSDebug.h>
91447636 68
9bccf70c 69#include <mach/clock_types.h>
1c79356b
A
70#include <mach/vm_prot.h>
71#include <mach/vm_statistics.h>
2d21ac55 72#include <mach/sdt.h>
1c79356b
A
73#include <kern/counters.h>
74#include <kern/sched_prim.h>
75#include <kern/task.h>
76#include <kern/thread.h>
b0d623f7 77#include <kern/kalloc.h>
1c79356b
A
78#include <kern/zalloc.h>
79#include <kern/xpr.h>
fe8ab488 80#include <kern/ledger.h>
1c79356b
A
81#include <vm/pmap.h>
82#include <vm/vm_init.h>
83#include <vm/vm_map.h>
84#include <vm/vm_page.h>
85#include <vm/vm_pageout.h>
86#include <vm/vm_kern.h> /* kernel_memory_allocate() */
87#include <kern/misc_protos.h>
88#include <zone_debug.h>
3e170ce0 89#include <mach_debug/zone_info.h>
1c79356b 90#include <vm/cpm.h>
6d2010ae 91#include <pexpert/pexpert.h>
55e303ae 92
91447636 93#include <vm/vm_protos.h>
2d21ac55
A
94#include <vm/memory_object.h>
95#include <vm/vm_purgeable_internal.h>
39236c6e 96#include <vm/vm_compressor.h>
2d21ac55 97
fe8ab488
A
98#if CONFIG_PHANTOM_CACHE
99#include <vm/vm_phantom_cache.h>
100#endif
101
b0d623f7
A
102#include <IOKit/IOHibernatePrivate.h>
103
b0d623f7
A
104#include <sys/kdebug.h>
105
316670eb 106boolean_t hibernate_cleaning_in_progress = FALSE;
b0d623f7
A
107boolean_t vm_page_free_verify = TRUE;
108
6d2010ae
A
109uint32_t vm_lopage_free_count = 0;
110uint32_t vm_lopage_free_limit = 0;
111uint32_t vm_lopage_lowater = 0;
0b4c1975
A
112boolean_t vm_lopage_refill = FALSE;
113boolean_t vm_lopage_needed = FALSE;
114
b0d623f7
A
115lck_mtx_ext_t vm_page_queue_lock_ext;
116lck_mtx_ext_t vm_page_queue_free_lock_ext;
117lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 118
0b4c1975
A
119int speculative_age_index = 0;
120int speculative_steal_index = 0;
2d21ac55
A
121struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
122
0b4e3aa0 123
b0d623f7
A
124__private_extern__ void vm_page_init_lck_grp(void);
125
6d2010ae
A
126static void vm_page_free_prepare(vm_page_t page);
127static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
128
3e170ce0 129static void vm_tag_init(void);
b0d623f7 130
3e170ce0 131uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
b0d623f7 132
1c79356b
A
133/*
134 * Associated with page of user-allocatable memory is a
135 * page structure.
136 */
137
138/*
139 * These variables record the values returned by vm_page_bootstrap,
140 * for debugging purposes. The implementation of pmap_steal_memory
141 * and pmap_startup here also uses them internally.
142 */
143
144vm_offset_t virtual_space_start;
145vm_offset_t virtual_space_end;
7ddcb079 146uint32_t vm_page_pages;
1c79356b
A
147
148/*
149 * The vm_page_lookup() routine, which provides for fast
150 * (virtual memory object, offset) to page lookup, employs
151 * the following hash table. The vm_page_{insert,remove}
152 * routines install and remove associations in the table.
153 * [This table is often called the virtual-to-physical,
154 * or VP, table.]
155 */
156typedef struct {
fe8ab488 157 vm_page_packed_t page_list;
1c79356b
A
158#if MACH_PAGE_HASH_STATS
159 int cur_count; /* current count */
160 int hi_count; /* high water mark */
161#endif /* MACH_PAGE_HASH_STATS */
162} vm_page_bucket_t;
163
b0d623f7
A
164
165#define BUCKETS_PER_LOCK 16
166
1c79356b
A
167vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
168unsigned int vm_page_bucket_count = 0; /* How big is array? */
169unsigned int vm_page_hash_mask; /* Mask for hash function */
170unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 171uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
172unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
173
174lck_spin_t *vm_page_bucket_locks;
3e170ce0
A
175lck_spin_t vm_objects_wired_lock;
176lck_spin_t vm_allocation_sites_lock;
1c79356b 177
15129b1c
A
178#if VM_PAGE_BUCKETS_CHECK
179boolean_t vm_page_buckets_check_ready = FALSE;
180#if VM_PAGE_FAKE_BUCKETS
181vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
182vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
183#endif /* VM_PAGE_FAKE_BUCKETS */
184#endif /* VM_PAGE_BUCKETS_CHECK */
91447636 185
3e170ce0
A
186extern int not_in_kdp;
187
188
1c79356b
A
189#if MACH_PAGE_HASH_STATS
190/* This routine is only for debug. It is intended to be called by
191 * hand by a developer using a kernel debugger. This routine prints
192 * out vm_page_hash table statistics to the kernel debug console.
193 */
194void
195hash_debug(void)
196{
197 int i;
198 int numbuckets = 0;
199 int highsum = 0;
200 int maxdepth = 0;
201
202 for (i = 0; i < vm_page_bucket_count; i++) {
203 if (vm_page_buckets[i].hi_count) {
204 numbuckets++;
205 highsum += vm_page_buckets[i].hi_count;
206 if (vm_page_buckets[i].hi_count > maxdepth)
207 maxdepth = vm_page_buckets[i].hi_count;
208 }
209 }
210 printf("Total number of buckets: %d\n", vm_page_bucket_count);
211 printf("Number used buckets: %d = %d%%\n",
212 numbuckets, 100*numbuckets/vm_page_bucket_count);
213 printf("Number unused buckets: %d = %d%%\n",
214 vm_page_bucket_count - numbuckets,
215 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
216 printf("Sum of bucket max depth: %d\n", highsum);
217 printf("Average bucket depth: %d.%2d\n",
218 highsum/vm_page_bucket_count,
219 highsum%vm_page_bucket_count);
220 printf("Maximum bucket depth: %d\n", maxdepth);
221}
222#endif /* MACH_PAGE_HASH_STATS */
223
224/*
225 * The virtual page size is currently implemented as a runtime
226 * variable, but is constant once initialized using vm_set_page_size.
227 * This initialization must be done in the machine-dependent
228 * bootstrap sequence, before calling other machine-independent
229 * initializations.
230 *
231 * All references to the virtual page size outside this
232 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
233 * constants.
234 */
55e303ae
A
235vm_size_t page_size = PAGE_SIZE;
236vm_size_t page_mask = PAGE_MASK;
2d21ac55 237int page_shift = PAGE_SHIFT;
1c79356b
A
238
239/*
240 * Resident page structures are initialized from
241 * a template (see vm_page_alloc).
242 *
243 * When adding a new field to the virtual memory
244 * object structure, be sure to add initialization
245 * (see vm_page_bootstrap).
246 */
247struct vm_page vm_page_template;
248
2d21ac55
A
249vm_page_t vm_pages = VM_PAGE_NULL;
250unsigned int vm_pages_count = 0;
0b4c1975 251ppnum_t vm_page_lowest = 0;
2d21ac55 252
1c79356b
A
253/*
254 * Resident pages that represent real memory
2d21ac55
A
255 * are allocated from a set of free lists,
256 * one per color.
1c79356b 257 */
2d21ac55
A
258unsigned int vm_colors;
259unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
260unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
fe8ab488 261unsigned int vm_free_magazine_refill_limit = 0;
2d21ac55 262queue_head_t vm_page_queue_free[MAX_COLORS];
1c79356b 263unsigned int vm_page_free_wanted;
2d21ac55 264unsigned int vm_page_free_wanted_privileged;
91447636
A
265unsigned int vm_page_free_count;
266unsigned int vm_page_fictitious_count;
1c79356b 267
1c79356b
A
268/*
269 * Occasionally, the virtual memory system uses
270 * resident page structures that do not refer to
271 * real pages, for example to leave a page with
272 * important state information in the VP table.
273 *
274 * These page structures are allocated the way
275 * most other kernel structures are.
276 */
277zone_t vm_page_zone;
b0d623f7
A
278vm_locks_array_t vm_page_locks;
279decl_lck_mtx_data(,vm_page_alloc_lock)
316670eb
A
280lck_mtx_ext_t vm_page_alloc_lock_ext;
281
9bccf70c 282unsigned int io_throttle_zero_fill;
1c79356b 283
b0d623f7
A
284unsigned int vm_page_local_q_count = 0;
285unsigned int vm_page_local_q_soft_limit = 250;
286unsigned int vm_page_local_q_hard_limit = 500;
287struct vplq *vm_page_local_q = NULL;
288
316670eb
A
289/* N.B. Guard and fictitious pages must not
290 * be assigned a zero phys_page value.
291 */
1c79356b
A
292/*
293 * Fictitious pages don't have a physical address,
55e303ae 294 * but we must initialize phys_page to something.
1c79356b
A
295 * For debugging, this should be a strange value
296 * that the pmap module can recognize in assertions.
297 */
b0d623f7 298ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 299
2d21ac55
A
300/*
301 * Guard pages are not accessible so they don't
302 * need a physical address, but we need to enter
303 * one in the pmap.
304 * Let's make it recognizable and make sure that
305 * we don't use a real physical page with that
306 * physical address.
307 */
b0d623f7 308ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 309
1c79356b
A
310/*
311 * Resident page structures are also chained on
312 * queues that are used by the page replacement
313 * system (pageout daemon). These queues are
314 * defined here, but are shared by the pageout
9bccf70c 315 * module. The inactive queue is broken into
39236c6e 316 * file backed and anonymous for convenience as the
9bccf70c 317 * pageout daemon often assignes a higher
39236c6e 318 * importance to anonymous pages (less likely to pick)
1c79356b
A
319 */
320queue_head_t vm_page_queue_active;
321queue_head_t vm_page_queue_inactive;
316670eb 322queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
b0d623f7 323queue_head_t vm_page_queue_throttled;
2d21ac55 324
3e170ce0
A
325queue_head_t vm_objects_wired;
326
91447636
A
327unsigned int vm_page_active_count;
328unsigned int vm_page_inactive_count;
316670eb 329unsigned int vm_page_anonymous_count;
2d21ac55
A
330unsigned int vm_page_throttled_count;
331unsigned int vm_page_speculative_count;
3e170ce0 332
91447636 333unsigned int vm_page_wire_count;
3e170ce0 334unsigned int vm_page_stolen_count;
0b4c1975 335unsigned int vm_page_wire_count_initial;
3e170ce0 336unsigned int vm_page_pages_initial;
91447636 337unsigned int vm_page_gobble_count = 0;
fe8ab488
A
338
339#define VM_PAGE_WIRE_COUNT_WARNING 0
340#define VM_PAGE_GOBBLE_COUNT_WARNING 0
91447636
A
341
342unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 343unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 344uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 345
fe8ab488 346unsigned int vm_page_xpmapped_external_count = 0;
39236c6e
A
347unsigned int vm_page_external_count = 0;
348unsigned int vm_page_internal_count = 0;
349unsigned int vm_page_pageable_external_count = 0;
350unsigned int vm_page_pageable_internal_count = 0;
351
b0d623f7 352#if DEVELOPMENT || DEBUG
2d21ac55
A
353unsigned int vm_page_speculative_recreated = 0;
354unsigned int vm_page_speculative_created = 0;
355unsigned int vm_page_speculative_used = 0;
b0d623f7 356#endif
2d21ac55 357
316670eb
A
358queue_head_t vm_page_queue_cleaned;
359
360unsigned int vm_page_cleaned_count = 0;
361unsigned int vm_pageout_enqueued_cleaned = 0;
362
0c530ab8 363uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
0b4c1975 364ppnum_t max_valid_low_ppnum = 0xffffffff;
0c530ab8
A
365
366
1c79356b
A
367/*
368 * Several page replacement parameters are also
369 * shared with this module, so that page allocation
370 * (done here in vm_page_alloc) can trigger the
371 * pageout daemon.
372 */
91447636
A
373unsigned int vm_page_free_target = 0;
374unsigned int vm_page_free_min = 0;
b0d623f7 375unsigned int vm_page_throttle_limit = 0;
91447636 376unsigned int vm_page_inactive_target = 0;
39236c6e 377unsigned int vm_page_anonymous_min = 0;
2d21ac55 378unsigned int vm_page_inactive_min = 0;
91447636 379unsigned int vm_page_free_reserved = 0;
b0d623f7 380unsigned int vm_page_throttle_count = 0;
1c79356b 381
316670eb 382
1c79356b
A
383/*
384 * The VM system has a couple of heuristics for deciding
385 * that pages are "uninteresting" and should be placed
386 * on the inactive queue as likely candidates for replacement.
387 * These variables let the heuristics be controlled at run-time
388 * to make experimentation easier.
389 */
390
391boolean_t vm_page_deactivate_hint = TRUE;
392
b0d623f7
A
393struct vm_page_stats_reusable vm_page_stats_reusable;
394
1c79356b
A
395/*
396 * vm_set_page_size:
397 *
398 * Sets the page size, perhaps based upon the memory
399 * size. Must be called before any use of page-size
400 * dependent functions.
401 *
402 * Sets page_shift and page_mask from page_size.
403 */
404void
405vm_set_page_size(void)
406{
fe8ab488
A
407 page_size = PAGE_SIZE;
408 page_mask = PAGE_MASK;
409 page_shift = PAGE_SHIFT;
1c79356b
A
410
411 if ((page_mask & page_size) != 0)
412 panic("vm_set_page_size: page size not a power of two");
413
414 for (page_shift = 0; ; page_shift++)
91447636 415 if ((1U << page_shift) == page_size)
1c79356b 416 break;
1c79356b
A
417}
418
fe8ab488
A
419#define COLOR_GROUPS_TO_STEAL 4
420
2d21ac55
A
421
422/* Called once during statup, once the cache geometry is known.
423 */
424static void
425vm_page_set_colors( void )
426{
427 unsigned int n, override;
428
593a1d5f 429 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
430 n = override;
431 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
432 n = vm_cache_geometry_colors;
433 else n = DEFAULT_COLORS; /* use default if all else fails */
434
435 if ( n == 0 )
436 n = 1;
437 if ( n > MAX_COLORS )
438 n = MAX_COLORS;
439
440 /* the count must be a power of 2 */
b0d623f7 441 if ( ( n & (n - 1)) != 0 )
2d21ac55
A
442 panic("vm_page_set_colors");
443
444 vm_colors = n;
445 vm_color_mask = n - 1;
fe8ab488
A
446
447 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
2d21ac55
A
448}
449
450
b0d623f7
A
451lck_grp_t vm_page_lck_grp_free;
452lck_grp_t vm_page_lck_grp_queue;
453lck_grp_t vm_page_lck_grp_local;
454lck_grp_t vm_page_lck_grp_purge;
455lck_grp_t vm_page_lck_grp_alloc;
456lck_grp_t vm_page_lck_grp_bucket;
457lck_grp_attr_t vm_page_lck_grp_attr;
458lck_attr_t vm_page_lck_attr;
459
460
461__private_extern__ void
462vm_page_init_lck_grp(void)
463{
464 /*
465 * initialze the vm_page lock world
466 */
467 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
468 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
469 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
470 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
471 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
472 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
473 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
474 lck_attr_setdefault(&vm_page_lck_attr);
316670eb 475 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
39236c6e
A
476
477 vm_compressor_init_locks();
b0d623f7
A
478}
479
480void
481vm_page_init_local_q()
482{
483 unsigned int num_cpus;
484 unsigned int i;
485 struct vplq *t_local_q;
486
487 num_cpus = ml_get_max_cpus();
488
489 /*
490 * no point in this for a uni-processor system
491 */
492 if (num_cpus >= 2) {
493 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
494
495 for (i = 0; i < num_cpus; i++) {
496 struct vpl *lq;
497
498 lq = &t_local_q[i].vpl_un.vpl;
499 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
500 queue_init(&lq->vpl_queue);
501 lq->vpl_count = 0;
39236c6e
A
502 lq->vpl_internal_count = 0;
503 lq->vpl_external_count = 0;
b0d623f7
A
504 }
505 vm_page_local_q_count = num_cpus;
506
507 vm_page_local_q = (struct vplq *)t_local_q;
508 }
509}
510
511
1c79356b
A
512/*
513 * vm_page_bootstrap:
514 *
515 * Initializes the resident memory module.
516 *
517 * Allocates memory for the page cells, and
518 * for the object/offset-to-page hash table headers.
519 * Each page cell is initialized and placed on the free list.
520 * Returns the range of available kernel virtual memory.
521 */
522
523void
524vm_page_bootstrap(
525 vm_offset_t *startp,
526 vm_offset_t *endp)
527{
528 register vm_page_t m;
91447636 529 unsigned int i;
1c79356b
A
530 unsigned int log1;
531 unsigned int log2;
532 unsigned int size;
533
534 /*
535 * Initialize the vm_page template.
536 */
537
538 m = &vm_page_template;
b0d623f7 539 bzero(m, sizeof (*m));
1c79356b 540
91447636
A
541 m->pageq.next = NULL;
542 m->pageq.prev = NULL;
543 m->listq.next = NULL;
544 m->listq.prev = NULL;
fe8ab488 545 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
91447636 546
b0d623f7
A
547 m->object = VM_OBJECT_NULL; /* reset later */
548 m->offset = (vm_object_offset_t) -1; /* reset later */
549
550 m->wire_count = 0;
551 m->local = FALSE;
1c79356b
A
552 m->inactive = FALSE;
553 m->active = FALSE;
b0d623f7
A
554 m->pageout_queue = FALSE;
555 m->speculative = FALSE;
1c79356b
A
556 m->laundry = FALSE;
557 m->free = FALSE;
558 m->reference = FALSE;
b0d623f7
A
559 m->gobbled = FALSE;
560 m->private = FALSE;
561 m->throttled = FALSE;
562 m->__unused_pageq_bits = 0;
563
564 m->phys_page = 0; /* reset later */
1c79356b
A
565
566 m->busy = TRUE;
567 m->wanted = FALSE;
568 m->tabled = FALSE;
15129b1c 569 m->hashed = FALSE;
1c79356b 570 m->fictitious = FALSE;
b0d623f7
A
571 m->pmapped = FALSE;
572 m->wpmapped = FALSE;
573 m->pageout = FALSE;
1c79356b
A
574 m->absent = FALSE;
575 m->error = FALSE;
576 m->dirty = FALSE;
577 m->cleaning = FALSE;
578 m->precious = FALSE;
579 m->clustered = FALSE;
b0d623f7 580 m->overwriting = FALSE;
1c79356b 581 m->restart = FALSE;
b0d623f7 582 m->unusual = FALSE;
91447636 583 m->encrypted = FALSE;
2d21ac55 584 m->encrypted_cleaning = FALSE;
b0d623f7
A
585 m->cs_validated = FALSE;
586 m->cs_tainted = FALSE;
c18c124e 587 m->cs_nx = FALSE;
b0d623f7 588 m->no_cache = FALSE;
b0d623f7 589 m->reusable = FALSE;
6d2010ae 590 m->slid = FALSE;
39236c6e
A
591 m->xpmapped = FALSE;
592 m->compressor = FALSE;
15129b1c 593 m->written_by_kernel = FALSE;
b0d623f7 594 m->__unused_object_bits = 0;
1c79356b 595
1c79356b
A
596 /*
597 * Initialize the page queues.
598 */
b0d623f7
A
599 vm_page_init_lck_grp();
600
601 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
602 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
603 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
604
605 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
606 int group;
607
608 purgeable_queues[i].token_q_head = 0;
609 purgeable_queues[i].token_q_tail = 0;
610 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
611 queue_init(&purgeable_queues[i].objq[group]);
612
613 purgeable_queues[i].type = i;
614 purgeable_queues[i].new_pages = 0;
615#if MACH_ASSERT
616 purgeable_queues[i].debug_count_tokens = 0;
617 purgeable_queues[i].debug_count_objects = 0;
618#endif
619 };
fe8ab488
A
620 purgeable_nonvolatile_count = 0;
621 queue_init(&purgeable_nonvolatile_queue);
2d21ac55
A
622
623 for (i = 0; i < MAX_COLORS; i++ )
624 queue_init(&vm_page_queue_free[i]);
6d2010ae 625
2d21ac55 626 queue_init(&vm_lopage_queue_free);
1c79356b
A
627 queue_init(&vm_page_queue_active);
628 queue_init(&vm_page_queue_inactive);
316670eb 629 queue_init(&vm_page_queue_cleaned);
2d21ac55 630 queue_init(&vm_page_queue_throttled);
316670eb 631 queue_init(&vm_page_queue_anonymous);
3e170ce0 632 queue_init(&vm_objects_wired);
1c79356b 633
2d21ac55
A
634 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
635 queue_init(&vm_page_queue_speculative[i].age_q);
636
637 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
638 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
639 }
1c79356b 640 vm_page_free_wanted = 0;
2d21ac55
A
641 vm_page_free_wanted_privileged = 0;
642
643 vm_page_set_colors();
644
1c79356b
A
645
646 /*
647 * Steal memory for the map and zone subsystems.
648 */
3e170ce0 649 kernel_debug_string_simple("zone_steal_memory");
1c79356b 650 zone_steal_memory();
3e170ce0 651 kernel_debug_string_simple("vm_map_steal_memory");
316670eb 652 vm_map_steal_memory();
1c79356b
A
653
654 /*
655 * Allocate (and initialize) the virtual-to-physical
656 * table hash buckets.
657 *
658 * The number of buckets should be a power of two to
659 * get a good hash function. The following computation
660 * chooses the first power of two that is greater
661 * than the number of physical pages in the system.
662 */
663
1c79356b
A
664 if (vm_page_bucket_count == 0) {
665 unsigned int npages = pmap_free_pages();
666
667 vm_page_bucket_count = 1;
668 while (vm_page_bucket_count < npages)
669 vm_page_bucket_count <<= 1;
670 }
b0d623f7 671 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
672
673 vm_page_hash_mask = vm_page_bucket_count - 1;
674
675 /*
676 * Calculate object shift value for hashing algorithm:
677 * O = log2(sizeof(struct vm_object))
678 * B = log2(vm_page_bucket_count)
679 * hash shifts the object left by
680 * B/2 - O
681 */
682 size = vm_page_bucket_count;
683 for (log1 = 0; size > 1; log1++)
684 size /= 2;
685 size = sizeof(struct vm_object);
686 for (log2 = 0; size > 1; log2++)
687 size /= 2;
688 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
689
690 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
691 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
692 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
693
694 if (vm_page_hash_mask & vm_page_bucket_count)
695 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
696
15129b1c
A
697#if VM_PAGE_BUCKETS_CHECK
698#if VM_PAGE_FAKE_BUCKETS
699 /*
700 * Allocate a decoy set of page buckets, to detect
701 * any stomping there.
702 */
703 vm_page_fake_buckets = (vm_page_bucket_t *)
704 pmap_steal_memory(vm_page_bucket_count *
705 sizeof(vm_page_bucket_t));
706 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
707 vm_page_fake_buckets_end =
708 vm_map_round_page((vm_page_fake_buckets_start +
709 (vm_page_bucket_count *
710 sizeof (vm_page_bucket_t))),
711 PAGE_MASK);
712 char *cp;
713 for (cp = (char *)vm_page_fake_buckets_start;
714 cp < (char *)vm_page_fake_buckets_end;
715 cp++) {
716 *cp = 0x5a;
717 }
718#endif /* VM_PAGE_FAKE_BUCKETS */
719#endif /* VM_PAGE_BUCKETS_CHECK */
720
3e170ce0 721 kernel_debug_string_simple("vm_page_buckets");
1c79356b
A
722 vm_page_buckets = (vm_page_bucket_t *)
723 pmap_steal_memory(vm_page_bucket_count *
724 sizeof(vm_page_bucket_t));
725
3e170ce0 726 kernel_debug_string_simple("vm_page_bucket_locks");
b0d623f7
A
727 vm_page_bucket_locks = (lck_spin_t *)
728 pmap_steal_memory(vm_page_bucket_lock_count *
729 sizeof(lck_spin_t));
730
1c79356b
A
731 for (i = 0; i < vm_page_bucket_count; i++) {
732 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
733
fe8ab488 734 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1c79356b
A
735#if MACH_PAGE_HASH_STATS
736 bucket->cur_count = 0;
737 bucket->hi_count = 0;
738#endif /* MACH_PAGE_HASH_STATS */
739 }
740
b0d623f7
A
741 for (i = 0; i < vm_page_bucket_lock_count; i++)
742 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
743
3e170ce0
A
744 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
745 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
746 vm_tag_init();
747
15129b1c
A
748#if VM_PAGE_BUCKETS_CHECK
749 vm_page_buckets_check_ready = TRUE;
750#endif /* VM_PAGE_BUCKETS_CHECK */
751
1c79356b
A
752 /*
753 * Machine-dependent code allocates the resident page table.
754 * It uses vm_page_init to initialize the page frames.
755 * The code also returns to us the virtual space available
756 * to the kernel. We don't trust the pmap module
757 * to get the alignment right.
758 */
759
3e170ce0 760 kernel_debug_string_simple("pmap_startup");
1c79356b 761 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
762 virtual_space_start = round_page(virtual_space_start);
763 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
764
765 *startp = virtual_space_start;
766 *endp = virtual_space_end;
767
768 /*
769 * Compute the initial "wire" count.
770 * Up until now, the pages which have been set aside are not under
771 * the VM system's control, so although they aren't explicitly
772 * wired, they nonetheless can't be moved. At this moment,
773 * all VM managed pages are "free", courtesy of pmap_startup.
774 */
b0d623f7 775 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0b4c1975
A
776 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
777 vm_page_wire_count_initial = vm_page_wire_count;
3e170ce0 778 vm_page_pages_initial = vm_page_pages;
91447636 779
2d21ac55
A
780 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
781 vm_page_free_count, vm_page_wire_count);
782
3e170ce0 783 kernel_debug_string_simple("vm_page_bootstrap complete");
91447636 784 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
785}
786
787#ifndef MACHINE_PAGES
788/*
789 * We implement pmap_steal_memory and pmap_startup with the help
790 * of two simpler functions, pmap_virtual_space and pmap_next_page.
791 */
792
91447636 793void *
1c79356b
A
794pmap_steal_memory(
795 vm_size_t size)
796{
55e303ae
A
797 vm_offset_t addr, vaddr;
798 ppnum_t phys_page;
1c79356b
A
799
800 /*
801 * We round the size to a round multiple.
802 */
803
804 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
805
806 /*
807 * If this is the first call to pmap_steal_memory,
808 * we have to initialize ourself.
809 */
810
811 if (virtual_space_start == virtual_space_end) {
812 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
813
814 /*
815 * The initial values must be aligned properly, and
816 * we don't trust the pmap module to do it right.
817 */
818
91447636
A
819 virtual_space_start = round_page(virtual_space_start);
820 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
821 }
822
823 /*
824 * Allocate virtual memory for this request.
825 */
826
827 addr = virtual_space_start;
828 virtual_space_start += size;
829
6d2010ae 830 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
831
832 /*
833 * Allocate and map physical pages to back new virtual pages.
834 */
835
91447636 836 for (vaddr = round_page(addr);
1c79356b
A
837 vaddr < addr + size;
838 vaddr += PAGE_SIZE) {
b0d623f7 839
0b4c1975 840 if (!pmap_next_page_hi(&phys_page))
1c79356b
A
841 panic("pmap_steal_memory");
842
843 /*
844 * XXX Logically, these mappings should be wired,
845 * but some pmap modules barf if they are.
846 */
b0d623f7
A
847#if defined(__LP64__)
848 pmap_pre_expand(kernel_pmap, vaddr);
849#endif
1c79356b 850
55e303ae 851 pmap_enter(kernel_pmap, vaddr, phys_page,
316670eb 852 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
9bccf70c 853 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
854 /*
855 * Account for newly stolen memory
856 */
857 vm_page_wire_count++;
3e170ce0 858 vm_page_stolen_count++;
1c79356b
A
859 }
860
91447636 861 return (void *) addr;
1c79356b
A
862}
863
fe8ab488 864void vm_page_release_startup(vm_page_t mem);
1c79356b
A
865void
866pmap_startup(
867 vm_offset_t *startp,
868 vm_offset_t *endp)
869{
55e303ae 870 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
871 ppnum_t phys_page;
872 addr64_t tmpaddr;
1c79356b 873
fe8ab488
A
874
875#if defined(__LP64__)
876 /*
877 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
878 */
879 assert(sizeof(struct vm_page) == 64);
880
881 /*
882 * make sure we are aligned on a 64 byte boundary
883 * for VM_PAGE_PACK_PTR (it clips off the low-order
884 * 6 bits of the pointer)
885 */
886 if (virtual_space_start != virtual_space_end)
887 virtual_space_start = round_page(virtual_space_start);
888#endif
889
1c79356b
A
890 /*
891 * We calculate how many page frames we will have
892 * and then allocate the page structures in one chunk.
893 */
894
55e303ae 895 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 896 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 897 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 898
2d21ac55 899 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
900
901 /*
902 * Initialize the page frames.
903 */
3e170ce0 904 kernel_debug_string_simple("Initialize the page frames");
1c79356b 905 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 906 if (!pmap_next_page(&phys_page))
1c79356b 907 break;
0b4c1975
A
908 if (pages_initialized == 0 || phys_page < vm_page_lowest)
909 vm_page_lowest = phys_page;
1c79356b 910
0b4c1975 911 vm_page_init(&vm_pages[i], phys_page, FALSE);
1c79356b
A
912 vm_page_pages++;
913 pages_initialized++;
914 }
2d21ac55 915 vm_pages_count = pages_initialized;
1c79356b 916
fe8ab488
A
917#if defined(__LP64__)
918
919 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
920 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
921
922 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
923 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
924#endif
3e170ce0 925 kernel_debug_string_simple("page fill/release");
0c530ab8
A
926 /*
927 * Check if we want to initialize pages to a known value
928 */
929 fill = 0; /* Assume no fill */
593a1d5f 930 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
316670eb
A
931#if DEBUG
932 /* This slows down booting the DEBUG kernel, particularly on
933 * large memory systems, but is worthwhile in deterministically
934 * trapping uninitialized memory usage.
935 */
936 if (fill == 0) {
937 fill = 1;
938 fillval = 0xDEB8F177;
939 }
940#endif
941 if (fill)
942 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
0c530ab8
A
943 // -debug code remove
944 if (2 == vm_himemory_mode) {
945 // free low -> high so high is preferred
0b4c1975 946 for (i = 1; i <= pages_initialized; i++) {
2d21ac55 947 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
fe8ab488 948 vm_page_release_startup(&vm_pages[i - 1]);
0c530ab8
A
949 }
950 }
951 else
952 // debug code remove-
953
1c79356b
A
954 /*
955 * Release pages in reverse order so that physical pages
956 * initially get allocated in ascending addresses. This keeps
957 * the devices (which must address physical memory) happy if
958 * they require several consecutive pages.
959 */
0b4c1975 960 for (i = pages_initialized; i > 0; i--) {
2d21ac55 961 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
fe8ab488 962 vm_page_release_startup(&vm_pages[i - 1]);
1c79356b
A
963 }
964
fe8ab488
A
965 VM_CHECK_MEMORYSTATUS;
966
55e303ae
A
967#if 0
968 {
969 vm_page_t xx, xxo, xxl;
2d21ac55 970 int i, j, k, l;
55e303ae
A
971
972 j = 0; /* (BRINGUP) */
973 xxl = 0;
974
2d21ac55
A
975 for( i = 0; i < vm_colors; i++ ) {
976 queue_iterate(&vm_page_queue_free[i],
977 xx,
978 vm_page_t,
979 pageq) { /* BRINGUP */
980 j++; /* (BRINGUP) */
981 if(j > vm_page_free_count) { /* (BRINGUP) */
982 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 983 }
2d21ac55
A
984
985 l = vm_page_free_count - j; /* (BRINGUP) */
986 k = 0; /* (BRINGUP) */
987
988 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
989
990 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
991 k++;
992 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
993 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
994 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
995 }
996 }
997
998 xxl = xx;
55e303ae
A
999 }
1000 }
1001
1002 if(j != vm_page_free_count) { /* (BRINGUP) */
1003 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1004 }
1005 }
1006#endif
1007
1008
1c79356b
A
1009 /*
1010 * We have to re-align virtual_space_start,
1011 * because pmap_steal_memory has been using it.
1012 */
1013
b0d623f7 1014 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
1015
1016 *startp = virtual_space_start;
1017 *endp = virtual_space_end;
1018}
1019#endif /* MACHINE_PAGES */
1020
1021/*
1022 * Routine: vm_page_module_init
1023 * Purpose:
1024 * Second initialization pass, to be done after
1025 * the basic VM system is ready.
1026 */
1027void
1028vm_page_module_init(void)
1029{
3e170ce0 1030 uint64_t vm_page_zone_pages, vm_page_zone_data_size;
1c79356b
A
1031 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1032 0, PAGE_SIZE, "vm pages");
1033
1034#if ZONE_DEBUG
1035 zone_debug_disable(vm_page_zone);
1036#endif /* ZONE_DEBUG */
1037
6d2010ae 1038 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1c79356b
A
1039 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1040 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1041 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
316670eb 1042 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
3e170ce0
A
1043 /*
1044 * Adjust zone statistics to account for the real pages allocated
1045 * in vm_page_create(). [Q: is this really what we want?]
1046 */
1047 vm_page_zone->count += vm_page_pages;
1048 vm_page_zone->sum_count += vm_page_pages;
1049 vm_page_zone_data_size = vm_page_pages * vm_page_zone->elem_size;
1050 vm_page_zone->cur_size += vm_page_zone_data_size;
1051 vm_page_zone_pages = ((round_page(vm_page_zone_data_size)) / PAGE_SIZE);
1052 OSAddAtomic64(vm_page_zone_pages, &(vm_page_zone->page_count));
1053 /* since zone accounts for these, take them out of stolen */
1054 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1c79356b
A
1055}
1056
1057/*
1058 * Routine: vm_page_create
1059 * Purpose:
1060 * After the VM system is up, machine-dependent code
1061 * may stumble across more physical memory. For example,
1062 * memory that it was reserving for a frame buffer.
1063 * vm_page_create turns this memory into available pages.
1064 */
1065
1066void
1067vm_page_create(
55e303ae
A
1068 ppnum_t start,
1069 ppnum_t end)
1c79356b 1070{
55e303ae
A
1071 ppnum_t phys_page;
1072 vm_page_t m;
1c79356b 1073
55e303ae
A
1074 for (phys_page = start;
1075 phys_page < end;
1076 phys_page++) {
6d2010ae 1077 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1c79356b
A
1078 == VM_PAGE_NULL)
1079 vm_page_more_fictitious();
1080
6d2010ae 1081 m->fictitious = FALSE;
0b4c1975 1082 pmap_clear_noencrypt(phys_page);
6d2010ae 1083
1c79356b
A
1084 vm_page_pages++;
1085 vm_page_release(m);
1086 }
1087}
1088
1089/*
1090 * vm_page_hash:
1091 *
1092 * Distributes the object/offset key pair among hash buckets.
1093 *
55e303ae 1094 * NOTE: The bucket count must be a power of 2
1c79356b
A
1095 */
1096#define vm_page_hash(object, offset) (\
b0d623f7 1097 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
1098 & vm_page_hash_mask)
1099
2d21ac55 1100
1c79356b
A
1101/*
1102 * vm_page_insert: [ internal use only ]
1103 *
1104 * Inserts the given mem entry into the object/object-page
1105 * table and object list.
1106 *
1107 * The object must be locked.
1108 */
1c79356b
A
1109void
1110vm_page_insert(
2d21ac55
A
1111 vm_page_t mem,
1112 vm_object_t object,
1113 vm_object_offset_t offset)
1114{
3e170ce0
A
1115 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1116}
1117
1118void
1119vm_page_insert_wired(
1120 vm_page_t mem,
1121 vm_object_t object,
1122 vm_object_offset_t offset,
1123 vm_tag_t tag)
1124{
1125 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
2d21ac55
A
1126}
1127
4a3eedf9 1128void
2d21ac55
A
1129vm_page_insert_internal(
1130 vm_page_t mem,
1131 vm_object_t object,
1132 vm_object_offset_t offset,
3e170ce0 1133 vm_tag_t tag,
b0d623f7 1134 boolean_t queues_lock_held,
316670eb 1135 boolean_t insert_in_hash,
3e170ce0
A
1136 boolean_t batch_pmap_op,
1137 boolean_t batch_accounting,
1138 uint64_t *delayed_ledger_update)
1c79356b 1139{
fe8ab488
A
1140 vm_page_bucket_t *bucket;
1141 lck_spin_t *bucket_lock;
1142 int hash_id;
1143 task_t owner;
1c79356b
A
1144
1145 XPR(XPR_VM_PAGE,
1146 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 1147 object, offset, mem, 0,0);
316670eb
A
1148#if 0
1149 /*
1150 * we may not hold the page queue lock
1151 * so this check isn't safe to make
1152 */
1c79356b 1153 VM_PAGE_CHECK(mem);
316670eb 1154#endif
1c79356b 1155
39236c6e
A
1156 assert(page_aligned(offset));
1157
3e170ce0
A
1158 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1159
fe8ab488
A
1160 /* the vm_submap_object is only a placeholder for submaps */
1161 assert(object != vm_submap_object);
2d21ac55
A
1162
1163 vm_object_lock_assert_exclusive(object);
1164#if DEBUG
b0d623f7
A
1165 lck_mtx_assert(&vm_page_queue_lock,
1166 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1167 : LCK_MTX_ASSERT_NOTOWNED);
1168#endif /* DEBUG */
3e170ce0 1169
b0d623f7 1170 if (insert_in_hash == TRUE) {
15129b1c 1171#if DEBUG || VM_PAGE_CHECK_BUCKETS
b0d623f7
A
1172 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1173 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1174 "already in (obj=%p,off=0x%llx)",
1175 mem, object, offset, mem->object, mem->offset);
91447636 1176#endif
6d2010ae 1177 assert(!object->internal || offset < object->vo_size);
1c79356b 1178
b0d623f7
A
1179 /* only insert "pageout" pages into "pageout" objects,
1180 * and normal pages into normal objects */
3e170ce0
A
1181#if 00
1182 /*
1183 * For some reason, this assertion gets tripped
1184 * but it's mostly harmless, so let's disable it
1185 * for now.
1186 */
b0d623f7 1187 assert(object->pageout == mem->pageout);
3e170ce0 1188#endif /* 00 */
91447636 1189
b0d623f7
A
1190 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1191
1192 /*
1193 * Record the object/offset pair in this page
1194 */
1c79356b 1195
b0d623f7
A
1196 mem->object = object;
1197 mem->offset = offset;
1c79356b 1198
b0d623f7
A
1199 /*
1200 * Insert it into the object_object/offset hash table
1201 */
1202 hash_id = vm_page_hash(object, offset);
1203 bucket = &vm_page_buckets[hash_id];
1204 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1205
1206 lck_spin_lock(bucket_lock);
1c79356b 1207
fe8ab488
A
1208 mem->next_m = bucket->page_list;
1209 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1210 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1211
1c79356b 1212#if MACH_PAGE_HASH_STATS
b0d623f7
A
1213 if (++bucket->cur_count > bucket->hi_count)
1214 bucket->hi_count = bucket->cur_count;
1c79356b 1215#endif /* MACH_PAGE_HASH_STATS */
15129b1c 1216 mem->hashed = TRUE;
b0d623f7
A
1217 lck_spin_unlock(bucket_lock);
1218 }
6d2010ae 1219
316670eb
A
1220 {
1221 unsigned int cache_attr;
6d2010ae
A
1222
1223 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1224
1225 if (cache_attr != VM_WIMG_USE_DEFAULT) {
316670eb 1226 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
6d2010ae
A
1227 }
1228 }
1c79356b
A
1229 /*
1230 * Now link into the object's list of backed pages.
1231 */
3e170ce0
A
1232 queue_enter(&object->memq, mem, vm_page_t, listq);
1233 object->memq_hint = mem;
1c79356b
A
1234 mem->tabled = TRUE;
1235
1236 /*
1237 * Show that the object has one more resident page.
1238 */
1239
1240 object->resident_page_count++;
b0d623f7 1241 if (VM_PAGE_WIRED(mem)) {
3e170ce0
A
1242 if (!mem->private && !mem->fictitious)
1243 {
1244 if (!object->wired_page_count)
1245 {
1246 assert(VM_KERN_MEMORY_NONE != tag);
1247 object->wire_tag = tag;
1248 VM_OBJECT_WIRED(object);
1249 }
1250 }
1251 object->wired_page_count++;
b0d623f7
A
1252 }
1253 assert(object->resident_page_count >= object->wired_page_count);
91447636 1254
3e170ce0
A
1255 if (batch_accounting == FALSE) {
1256 if (object->internal) {
1257 OSAddAtomic(1, &vm_page_internal_count);
1258 } else {
1259 OSAddAtomic(1, &vm_page_external_count);
1260 }
39236c6e
A
1261 }
1262
1263 /*
1264 * It wouldn't make sense to insert a "reusable" page in
1265 * an object (the page would have been marked "reusable" only
1266 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1267 * in the object at that time).
1268 * But a page could be inserted in a "all_reusable" object, if
1269 * something faults it in (a vm_read() from another task or a
1270 * "use-after-free" issue in user space, for example). It can
1271 * also happen if we're relocating a page from that object to
1272 * a different physical page during a physically-contiguous
1273 * allocation.
1274 */
b0d623f7 1275 assert(!mem->reusable);
39236c6e
A
1276 if (mem->object->all_reusable) {
1277 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1278 }
2d21ac55 1279
fe8ab488
A
1280 if (object->purgable == VM_PURGABLE_DENY) {
1281 owner = TASK_NULL;
1282 } else {
1283 owner = object->vo_purgeable_owner;
1284 }
1285 if (owner &&
1286 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1287 VM_PAGE_WIRED(mem))) {
3e170ce0
A
1288
1289 if (delayed_ledger_update)
1290 *delayed_ledger_update += PAGE_SIZE;
1291 else {
1292 /* more non-volatile bytes */
1293 ledger_credit(owner->ledger,
1294 task_ledgers.purgeable_nonvolatile,
1295 PAGE_SIZE);
1296 /* more footprint */
1297 ledger_credit(owner->ledger,
1298 task_ledgers.phys_footprint,
1299 PAGE_SIZE);
1300 }
fe8ab488
A
1301
1302 } else if (owner &&
1303 (object->purgable == VM_PURGABLE_VOLATILE ||
1304 object->purgable == VM_PURGABLE_EMPTY)) {
1305 assert(! VM_PAGE_WIRED(mem));
1306 /* more volatile bytes */
1307 ledger_credit(owner->ledger,
1308 task_ledgers.purgeable_volatile,
1309 PAGE_SIZE);
1310 }
1311
b0d623f7
A
1312 if (object->purgable == VM_PURGABLE_VOLATILE) {
1313 if (VM_PAGE_WIRED(mem)) {
fe8ab488 1314 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
b0d623f7 1315 } else {
fe8ab488 1316 OSAddAtomic(+1, &vm_page_purgeable_count);
b0d623f7 1317 }
593a1d5f
A
1318 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1319 mem->throttled) {
b0d623f7
A
1320 /*
1321 * This page belongs to a purged VM object but hasn't
1322 * been purged (because it was "busy").
1323 * It's in the "throttled" queue and hence not
1324 * visible to vm_pageout_scan(). Move it to a pageable
1325 * queue, so that it can eventually be reclaimed, instead
1326 * of lingering in the "empty" object.
1327 */
593a1d5f 1328 if (queues_lock_held == FALSE)
b0d623f7 1329 vm_page_lockspin_queues();
593a1d5f 1330 vm_page_deactivate(mem);
2d21ac55
A
1331 if (queues_lock_held == FALSE)
1332 vm_page_unlock_queues();
91447636 1333 }
fe8ab488
A
1334
1335#if VM_OBJECT_TRACKING_OP_MODIFIED
1336 if (vm_object_tracking_inited &&
1337 object->internal &&
1338 object->resident_page_count == 0 &&
1339 object->pager == NULL &&
1340 object->shadow != NULL &&
1341 object->shadow->copy == object) {
1342 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1343 int numsaved = 0;
1344
1345 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1346 btlog_add_entry(vm_object_tracking_btlog,
1347 object,
1348 VM_OBJECT_TRACKING_OP_MODIFIED,
1349 bt,
1350 numsaved);
1351 }
1352#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1c79356b
A
1353}
1354
1355/*
1356 * vm_page_replace:
1357 *
1358 * Exactly like vm_page_insert, except that we first
1359 * remove any existing page at the given offset in object.
1360 *
b0d623f7 1361 * The object must be locked.
1c79356b 1362 */
1c79356b
A
1363void
1364vm_page_replace(
1365 register vm_page_t mem,
1366 register vm_object_t object,
1367 register vm_object_offset_t offset)
1368{
0c530ab8
A
1369 vm_page_bucket_t *bucket;
1370 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1371 lck_spin_t *bucket_lock;
1372 int hash_id;
1c79356b 1373
316670eb
A
1374#if 0
1375 /*
1376 * we don't hold the page queue lock
1377 * so this check isn't safe to make
1378 */
1c79356b 1379 VM_PAGE_CHECK(mem);
316670eb 1380#endif
2d21ac55 1381 vm_object_lock_assert_exclusive(object);
15129b1c 1382#if DEBUG || VM_PAGE_CHECK_BUCKETS
91447636
A
1383 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1384 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1385 "already in (obj=%p,off=0x%llx)",
1386 mem, object, offset, mem->object, mem->offset);
b0d623f7 1387 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
91447636 1388#endif
1c79356b
A
1389 /*
1390 * Record the object/offset pair in this page
1391 */
1392
1393 mem->object = object;
1394 mem->offset = offset;
1395
1396 /*
1397 * Insert it into the object_object/offset hash table,
1398 * replacing any page that might have been there.
1399 */
1400
b0d623f7
A
1401 hash_id = vm_page_hash(object, offset);
1402 bucket = &vm_page_buckets[hash_id];
1403 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1404
1405 lck_spin_lock(bucket_lock);
0c530ab8 1406
fe8ab488
A
1407 if (bucket->page_list) {
1408 vm_page_packed_t *mp = &bucket->page_list;
1409 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
0c530ab8 1410
1c79356b
A
1411 do {
1412 if (m->object == object && m->offset == offset) {
1413 /*
0c530ab8 1414 * Remove old page from hash list
1c79356b 1415 */
fe8ab488 1416 *mp = m->next_m;
15129b1c 1417 m->hashed = FALSE;
1c79356b 1418
0c530ab8 1419 found_m = m;
1c79356b
A
1420 break;
1421 }
fe8ab488
A
1422 mp = &m->next_m;
1423 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
0c530ab8 1424
fe8ab488 1425 mem->next_m = bucket->page_list;
1c79356b 1426 } else {
fe8ab488 1427 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1c79356b 1428 }
0c530ab8
A
1429 /*
1430 * insert new page at head of hash list
1431 */
fe8ab488 1432 bucket->page_list = VM_PAGE_PACK_PTR(mem);
15129b1c 1433 mem->hashed = TRUE;
0c530ab8 1434
b0d623f7 1435 lck_spin_unlock(bucket_lock);
1c79356b 1436
0c530ab8
A
1437 if (found_m) {
1438 /*
1439 * there was already a page at the specified
1440 * offset for this object... remove it from
1441 * the object and free it back to the free list
1442 */
b0d623f7 1443 vm_page_free_unlocked(found_m, FALSE);
91447636 1444 }
3e170ce0 1445 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1c79356b
A
1446}
1447
1448/*
1449 * vm_page_remove: [ internal use only ]
1450 *
1451 * Removes the given mem entry from the object/offset-page
1452 * table and the object page list.
1453 *
b0d623f7 1454 * The object must be locked.
1c79356b
A
1455 */
1456
1457void
1458vm_page_remove(
b0d623f7
A
1459 vm_page_t mem,
1460 boolean_t remove_from_hash)
1c79356b 1461{
b0d623f7
A
1462 vm_page_bucket_t *bucket;
1463 vm_page_t this;
1464 lck_spin_t *bucket_lock;
1465 int hash_id;
fe8ab488 1466 task_t owner;
1c79356b
A
1467
1468 XPR(XPR_VM_PAGE,
1469 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7
A
1470 mem->object, mem->offset,
1471 mem, 0,0);
1472
2d21ac55 1473 vm_object_lock_assert_exclusive(mem->object);
1c79356b
A
1474 assert(mem->tabled);
1475 assert(!mem->cleaning);
316670eb
A
1476 assert(!mem->laundry);
1477#if 0
1478 /*
1479 * we don't hold the page queue lock
1480 * so this check isn't safe to make
1481 */
1c79356b 1482 VM_PAGE_CHECK(mem);
316670eb 1483#endif
b0d623f7
A
1484 if (remove_from_hash == TRUE) {
1485 /*
1486 * Remove from the object_object/offset hash table
1487 */
1488 hash_id = vm_page_hash(mem->object, mem->offset);
1489 bucket = &vm_page_buckets[hash_id];
1490 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1491
b0d623f7 1492 lck_spin_lock(bucket_lock);
1c79356b 1493
fe8ab488 1494 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
b0d623f7 1495 /* optimize for common case */
1c79356b 1496
fe8ab488 1497 bucket->page_list = mem->next_m;
b0d623f7 1498 } else {
fe8ab488 1499 vm_page_packed_t *prev;
1c79356b 1500
fe8ab488
A
1501 for (prev = &this->next_m;
1502 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1503 prev = &this->next_m)
b0d623f7 1504 continue;
fe8ab488 1505 *prev = this->next_m;
b0d623f7 1506 }
1c79356b 1507#if MACH_PAGE_HASH_STATS
b0d623f7 1508 bucket->cur_count--;
1c79356b 1509#endif /* MACH_PAGE_HASH_STATS */
15129b1c 1510 mem->hashed = FALSE;
b0d623f7
A
1511 lck_spin_unlock(bucket_lock);
1512 }
1c79356b
A
1513 /*
1514 * Now remove from the object's list of backed pages.
1515 */
1516
3e170ce0 1517 vm_page_remove_internal(mem);
1c79356b
A
1518
1519 /*
1520 * And show that the object has one fewer resident
1521 * page.
1522 */
1523
b0d623f7 1524 assert(mem->object->resident_page_count > 0);
1c79356b 1525 mem->object->resident_page_count--;
6d2010ae 1526
39236c6e 1527 if (mem->object->internal) {
fe8ab488 1528#if DEBUG
39236c6e 1529 assert(vm_page_internal_count);
fe8ab488
A
1530#endif /* DEBUG */
1531
39236c6e
A
1532 OSAddAtomic(-1, &vm_page_internal_count);
1533 } else {
1534 assert(vm_page_external_count);
1535 OSAddAtomic(-1, &vm_page_external_count);
fe8ab488
A
1536
1537 if (mem->xpmapped) {
1538 assert(vm_page_xpmapped_external_count);
1539 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1540 }
39236c6e 1541 }
6d2010ae
A
1542 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1543 if (mem->object->resident_page_count == 0)
1544 vm_object_cache_remove(mem->object);
1545 }
1546
b0d623f7
A
1547 if (VM_PAGE_WIRED(mem)) {
1548 assert(mem->object->wired_page_count > 0);
1549 mem->object->wired_page_count--;
3e170ce0
A
1550 if (!mem->object->wired_page_count) {
1551 VM_OBJECT_UNWIRED(mem->object);
1552 }
b0d623f7
A
1553 }
1554 assert(mem->object->resident_page_count >=
1555 mem->object->wired_page_count);
1556 if (mem->reusable) {
1557 assert(mem->object->reusable_page_count > 0);
1558 mem->object->reusable_page_count--;
1559 assert(mem->object->reusable_page_count <=
1560 mem->object->resident_page_count);
1561 mem->reusable = FALSE;
1562 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1563 vm_page_stats_reusable.reused_remove++;
1564 } else if (mem->object->all_reusable) {
1565 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1566 vm_page_stats_reusable.reused_remove++;
1567 }
1c79356b 1568
fe8ab488
A
1569 if (mem->object->purgable == VM_PURGABLE_DENY) {
1570 owner = TASK_NULL;
1571 } else {
1572 owner = mem->object->vo_purgeable_owner;
1573 }
1574 if (owner &&
1575 (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1576 VM_PAGE_WIRED(mem))) {
1577 /* less non-volatile bytes */
1578 ledger_debit(owner->ledger,
1579 task_ledgers.purgeable_nonvolatile,
1580 PAGE_SIZE);
1581 /* less footprint */
1582 ledger_debit(owner->ledger,
1583 task_ledgers.phys_footprint,
1584 PAGE_SIZE);
1585 } else if (owner &&
1586 (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1587 mem->object->purgable == VM_PURGABLE_EMPTY)) {
1588 assert(! VM_PAGE_WIRED(mem));
1589 /* less volatile bytes */
1590 ledger_debit(owner->ledger,
1591 task_ledgers.purgeable_volatile,
1592 PAGE_SIZE);
1593 }
593a1d5f 1594 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1595 if (VM_PAGE_WIRED(mem)) {
1596 assert(vm_page_purgeable_wired_count > 0);
1597 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1598 } else {
1599 assert(vm_page_purgeable_count > 0);
1600 OSAddAtomic(-1, &vm_page_purgeable_count);
1601 }
91447636 1602 }
6d2010ae
A
1603 if (mem->object->set_cache_attr == TRUE)
1604 pmap_set_cache_attributes(mem->phys_page, 0);
1605
1c79356b
A
1606 mem->tabled = FALSE;
1607 mem->object = VM_OBJECT_NULL;
91447636 1608 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1609}
1610
b0d623f7 1611
1c79356b
A
1612/*
1613 * vm_page_lookup:
1614 *
1615 * Returns the page associated with the object/offset
1616 * pair specified; if none is found, VM_PAGE_NULL is returned.
1617 *
1618 * The object must be locked. No side effects.
1619 */
1620
3e170ce0
A
1621#define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1622
1623#if DEBUG_VM_PAGE_LOOKUP
2d21ac55 1624
3e170ce0
A
1625struct {
1626 uint64_t vpl_total;
1627 uint64_t vpl_empty_obj;
1628 uint64_t vpl_bucket_NULL;
1629 uint64_t vpl_hit_hint;
1630 uint64_t vpl_hit_hint_next;
1631 uint64_t vpl_hit_hint_prev;
1632 uint64_t vpl_fast;
1633 uint64_t vpl_slow;
1634 uint64_t vpl_hit;
1635 uint64_t vpl_miss;
1636
1637 uint64_t vpl_fast_elapsed;
1638 uint64_t vpl_slow_elapsed;
1639} vm_page_lookup_stats __attribute__((aligned(8)));
1640
1641#endif
1642
1643#define KDP_VM_PAGE_WALK_MAX 1000
1644
1645vm_page_t
1646kdp_vm_page_lookup(
1647 vm_object_t object,
1648 vm_object_offset_t offset)
1649{
1650 vm_page_t cur_page;
1651 int num_traversed = 0;
1652
1653 if (not_in_kdp) {
1654 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1655 }
1656
1657 queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1658 if (cur_page->offset == offset) {
1659 return cur_page;
1660 }
1661 num_traversed++;
1662
1663 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1664 return VM_PAGE_NULL;
1665 }
1666 }
1667
1668 return VM_PAGE_NULL;
1669}
91447636 1670
1c79356b
A
1671vm_page_t
1672vm_page_lookup(
b0d623f7
A
1673 vm_object_t object,
1674 vm_object_offset_t offset)
1c79356b 1675{
b0d623f7
A
1676 vm_page_t mem;
1677 vm_page_bucket_t *bucket;
1678 queue_entry_t qe;
3e170ce0 1679 lck_spin_t *bucket_lock = NULL;
b0d623f7 1680 int hash_id;
3e170ce0
A
1681#if DEBUG_VM_PAGE_LOOKUP
1682 uint64_t start, elapsed;
91447636 1683
3e170ce0
A
1684 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1685#endif
2d21ac55 1686 vm_object_lock_assert_held(object);
3e170ce0
A
1687
1688 if (object->resident_page_count == 0) {
1689#if DEBUG_VM_PAGE_LOOKUP
1690 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
1691#endif
1692 return (VM_PAGE_NULL);
1693 }
1694
91447636 1695 mem = object->memq_hint;
2d21ac55 1696
91447636
A
1697 if (mem != VM_PAGE_NULL) {
1698 assert(mem->object == object);
2d21ac55 1699
91447636 1700 if (mem->offset == offset) {
3e170ce0
A
1701#if DEBUG_VM_PAGE_LOOKUP
1702 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
1703#endif
1704 return (mem);
91447636
A
1705 }
1706 qe = queue_next(&mem->listq);
2d21ac55 1707
91447636
A
1708 if (! queue_end(&object->memq, qe)) {
1709 vm_page_t next_page;
1710
1711 next_page = (vm_page_t) qe;
1712 assert(next_page->object == object);
2d21ac55 1713
91447636 1714 if (next_page->offset == offset) {
91447636 1715 object->memq_hint = next_page; /* new hint */
3e170ce0
A
1716#if DEBUG_VM_PAGE_LOOKUP
1717 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
1718#endif
1719 return (next_page);
91447636
A
1720 }
1721 }
1722 qe = queue_prev(&mem->listq);
2d21ac55 1723
91447636
A
1724 if (! queue_end(&object->memq, qe)) {
1725 vm_page_t prev_page;
1726
1727 prev_page = (vm_page_t) qe;
1728 assert(prev_page->object == object);
2d21ac55 1729
91447636 1730 if (prev_page->offset == offset) {
91447636 1731 object->memq_hint = prev_page; /* new hint */
3e170ce0
A
1732#if DEBUG_VM_PAGE_LOOKUP
1733 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
1734#endif
1735 return (prev_page);
91447636
A
1736 }
1737 }
1738 }
1c79356b 1739 /*
2d21ac55 1740 * Search the hash table for this object/offset pair
1c79356b 1741 */
b0d623f7
A
1742 hash_id = vm_page_hash(object, offset);
1743 bucket = &vm_page_buckets[hash_id];
1c79356b 1744
2d21ac55
A
1745 /*
1746 * since we hold the object lock, we are guaranteed that no
1747 * new pages can be inserted into this object... this in turn
1748 * guarantess that the page we're looking for can't exist
1749 * if the bucket it hashes to is currently NULL even when looked
1750 * at outside the scope of the hash bucket lock... this is a
1751 * really cheap optimiztion to avoid taking the lock
1752 */
fe8ab488 1753 if (!bucket->page_list) {
3e170ce0
A
1754#if DEBUG_VM_PAGE_LOOKUP
1755 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
1756#endif
2d21ac55
A
1757 return (VM_PAGE_NULL);
1758 }
0c530ab8 1759
3e170ce0
A
1760#if DEBUG_VM_PAGE_LOOKUP
1761 start = mach_absolute_time();
1762#endif
1763 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
316670eb 1764 /*
3e170ce0
A
1765 * on average, it's roughly 3 times faster to run a short memq list
1766 * than to take the spin lock and go through the hash list
316670eb 1767 */
3e170ce0
A
1768 mem = (vm_page_t)queue_first(&object->memq);
1769
1770 while (!queue_end(&object->memq, (queue_entry_t)mem)) {
1771
1772 if (mem->offset == offset)
1773 break;
1774
1775 mem = (vm_page_t)queue_next(&mem->listq);
1776 }
1777 if (queue_end(&object->memq, (queue_entry_t)mem))
1778 mem = NULL;
1779 } else {
1780
1781 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1782
1783 lck_spin_lock(bucket_lock);
1784
1785 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1786#if 0
1787 /*
1788 * we don't hold the page queue lock
1789 * so this check isn't safe to make
1790 */
1791 VM_PAGE_CHECK(mem);
316670eb 1792#endif
3e170ce0
A
1793 if ((mem->object == object) && (mem->offset == offset))
1794 break;
1795 }
1796 lck_spin_unlock(bucket_lock);
1c79356b 1797 }
55e303ae 1798
3e170ce0
A
1799#if DEBUG_VM_PAGE_LOOKUP
1800 elapsed = mach_absolute_time() - start;
1801
1802 if (bucket_lock) {
1803 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
1804 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
1805 } else {
1806 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
1807 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
1808 }
1809 if (mem != VM_PAGE_NULL)
1810 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
1811 else
1812 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
1813#endif
91447636 1814 if (mem != VM_PAGE_NULL) {
91447636 1815 assert(mem->object == object);
91447636 1816
3e170ce0
A
1817 object->memq_hint = mem;
1818 }
1819 return (mem);
91447636
A
1820}
1821
1822
1c79356b
A
1823/*
1824 * vm_page_rename:
1825 *
1826 * Move the given memory entry from its
1827 * current object to the specified target object/offset.
1828 *
1829 * The object must be locked.
1830 */
1831void
1832vm_page_rename(
1833 register vm_page_t mem,
1834 register vm_object_t new_object,
2d21ac55
A
1835 vm_object_offset_t new_offset,
1836 boolean_t encrypted_ok)
1c79356b 1837{
3e170ce0
A
1838 boolean_t internal_to_external, external_to_internal;
1839 vm_tag_t tag;
39236c6e 1840
1c79356b 1841 assert(mem->object != new_object);
2d21ac55 1842
3e170ce0
A
1843 assert(mem->object);
1844
91447636
A
1845 /*
1846 * ENCRYPTED SWAP:
1847 * The encryption key is based on the page's memory object
1848 * (aka "pager") and paging offset. Moving the page to
1849 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
1850 * so it has to be decrypted first, or we would lose the key.
1851 *
1852 * One exception is VM object collapsing, where we transfer pages
1853 * from one backing object to its parent object. This operation also
1854 * transfers the paging information, so the <pager,paging_offset> info
1855 * should remain consistent. The caller (vm_object_do_collapse())
1856 * sets "encrypted_ok" in this case.
91447636 1857 */
2d21ac55 1858 if (!encrypted_ok && mem->encrypted) {
91447636
A
1859 panic("vm_page_rename: page %p is encrypted\n", mem);
1860 }
2d21ac55 1861
b0d623f7
A
1862 XPR(XPR_VM_PAGE,
1863 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1864 new_object, new_offset,
1865 mem, 0,0);
1866
1c79356b
A
1867 /*
1868 * Changes to mem->object require the page lock because
1869 * the pageout daemon uses that lock to get the object.
1870 */
b0d623f7 1871 vm_page_lockspin_queues();
1c79356b 1872
39236c6e
A
1873 internal_to_external = FALSE;
1874 external_to_internal = FALSE;
1875
1876 if (mem->local) {
1877 /*
1878 * it's much easier to get the vm_page_pageable_xxx accounting correct
1879 * if we first move the page to the active queue... it's going to end
1880 * up there anyway, and we don't do vm_page_rename's frequently enough
1881 * for this to matter.
1882 */
3e170ce0 1883 vm_page_queues_remove(mem);
39236c6e
A
1884 vm_page_activate(mem);
1885 }
1886 if (mem->active || mem->inactive || mem->speculative) {
1887 if (mem->object->internal && !new_object->internal) {
1888 internal_to_external = TRUE;
1889 }
1890 if (!mem->object->internal && new_object->internal) {
1891 external_to_internal = TRUE;
1892 }
1893 }
1894
3e170ce0 1895 tag = mem->object->wire_tag;
b0d623f7 1896 vm_page_remove(mem, TRUE);
3e170ce0 1897 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
1c79356b 1898
39236c6e
A
1899 if (internal_to_external) {
1900 vm_page_pageable_internal_count--;
1901 vm_page_pageable_external_count++;
1902 } else if (external_to_internal) {
1903 vm_page_pageable_external_count--;
1904 vm_page_pageable_internal_count++;
1905 }
1906
1c79356b
A
1907 vm_page_unlock_queues();
1908}
1909
1910/*
1911 * vm_page_init:
1912 *
1913 * Initialize the fields in a new page.
1914 * This takes a structure with random values and initializes it
1915 * so that it can be given to vm_page_release or vm_page_insert.
1916 */
1917void
1918vm_page_init(
1919 vm_page_t mem,
0b4c1975
A
1920 ppnum_t phys_page,
1921 boolean_t lopage)
1c79356b 1922{
91447636 1923 assert(phys_page);
7ddcb079
A
1924
1925#if DEBUG
1926 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1927 if (!(pmap_valid_page(phys_page))) {
1928 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1929 }
1930 }
1931#endif
1c79356b 1932 *mem = vm_page_template;
55e303ae 1933 mem->phys_page = phys_page;
6d2010ae
A
1934#if 0
1935 /*
1936 * we're leaving this turned off for now... currently pages
1937 * come off the free list and are either immediately dirtied/referenced
1938 * due to zero-fill or COW faults, or are used to read or write files...
1939 * in the file I/O case, the UPL mechanism takes care of clearing
1940 * the state of the HW ref/mod bits in a somewhat fragile way.
1941 * Since we may change the way this works in the future (to toughen it up),
1942 * I'm leaving this as a reminder of where these bits could get cleared
1943 */
1944
1945 /*
1946 * make sure both the h/w referenced and modified bits are
1947 * clear at this point... we are especially dependent on
1948 * not finding a 'stale' h/w modified in a number of spots
1949 * once this page goes back into use
1950 */
1951 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1952#endif
0b4c1975 1953 mem->lopage = lopage;
1c79356b
A
1954}
1955
1956/*
1957 * vm_page_grab_fictitious:
1958 *
1959 * Remove a fictitious page from the free list.
1960 * Returns VM_PAGE_NULL if there are no free pages.
1961 */
1962int c_vm_page_grab_fictitious = 0;
6d2010ae 1963int c_vm_page_grab_fictitious_failed = 0;
1c79356b
A
1964int c_vm_page_release_fictitious = 0;
1965int c_vm_page_more_fictitious = 0;
1966
1967vm_page_t
2d21ac55 1968vm_page_grab_fictitious_common(
b0d623f7 1969 ppnum_t phys_addr)
1c79356b 1970{
6d2010ae
A
1971 vm_page_t m;
1972
1973 if ((m = (vm_page_t)zget(vm_page_zone))) {
1c79356b 1974
0b4c1975 1975 vm_page_init(m, phys_addr, FALSE);
1c79356b 1976 m->fictitious = TRUE;
1c79356b 1977
6d2010ae
A
1978 c_vm_page_grab_fictitious++;
1979 } else
1980 c_vm_page_grab_fictitious_failed++;
1981
1c79356b
A
1982 return m;
1983}
1984
2d21ac55
A
1985vm_page_t
1986vm_page_grab_fictitious(void)
1987{
1988 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1989}
1990
1991vm_page_t
1992vm_page_grab_guard(void)
1993{
1994 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1995}
1996
6d2010ae 1997
1c79356b
A
1998/*
1999 * vm_page_release_fictitious:
2000 *
6d2010ae 2001 * Release a fictitious page to the zone pool
1c79356b 2002 */
1c79356b
A
2003void
2004vm_page_release_fictitious(
6d2010ae 2005 vm_page_t m)
1c79356b
A
2006{
2007 assert(!m->free);
1c79356b 2008 assert(m->fictitious);
2d21ac55
A
2009 assert(m->phys_page == vm_page_fictitious_addr ||
2010 m->phys_page == vm_page_guard_addr);
1c79356b
A
2011
2012 c_vm_page_release_fictitious++;
6d2010ae 2013
91447636 2014 zfree(vm_page_zone, m);
1c79356b
A
2015}
2016
2017/*
2018 * vm_page_more_fictitious:
2019 *
6d2010ae 2020 * Add more fictitious pages to the zone.
1c79356b
A
2021 * Allowed to block. This routine is way intimate
2022 * with the zones code, for several reasons:
2023 * 1. we need to carve some page structures out of physical
2024 * memory before zones work, so they _cannot_ come from
2025 * the zone_map.
2026 * 2. the zone needs to be collectable in order to prevent
2027 * growth without bound. These structures are used by
2028 * the device pager (by the hundreds and thousands), as
2029 * private pages for pageout, and as blocking pages for
2030 * pagein. Temporary bursts in demand should not result in
2031 * permanent allocation of a resource.
2032 * 3. To smooth allocation humps, we allocate single pages
2033 * with kernel_memory_allocate(), and cram them into the
6d2010ae 2034 * zone.
1c79356b
A
2035 */
2036
2037void vm_page_more_fictitious(void)
2038{
6d2010ae
A
2039 vm_offset_t addr;
2040 kern_return_t retval;
1c79356b
A
2041
2042 c_vm_page_more_fictitious++;
2043
1c79356b
A
2044 /*
2045 * Allocate a single page from the zone_map. Do not wait if no physical
2046 * pages are immediately available, and do not zero the space. We need
2047 * our own blocking lock here to prevent having multiple,
2048 * simultaneous requests from piling up on the zone_map lock. Exactly
2049 * one (of our) threads should be potentially waiting on the map lock.
2050 * If winner is not vm-privileged, then the page allocation will fail,
2051 * and it will temporarily block here in the vm_page_wait().
2052 */
b0d623f7 2053 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
2054 /*
2055 * If another thread allocated space, just bail out now.
2056 */
2057 if (zone_free_count(vm_page_zone) > 5) {
2058 /*
2059 * The number "5" is a small number that is larger than the
2060 * number of fictitious pages that any single caller will
2061 * attempt to allocate. Otherwise, a thread will attempt to
2062 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2063 * release all of the resources and locks already acquired,
2064 * and then call this routine. This routine finds the pages
2065 * that the caller released, so fails to allocate new space.
2066 * The process repeats infinitely. The largest known number
2067 * of fictitious pages required in this manner is 2. 5 is
2068 * simply a somewhat larger number.
2069 */
b0d623f7 2070 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2071 return;
2072 }
2073
91447636
A
2074 retval = kernel_memory_allocate(zone_map,
2075 &addr, PAGE_SIZE, VM_PROT_ALL,
3e170ce0 2076 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
91447636 2077 if (retval != KERN_SUCCESS) {
1c79356b 2078 /*
6d2010ae 2079 * No page was available. Drop the
1c79356b
A
2080 * lock to give another thread a chance at it, and
2081 * wait for the pageout daemon to make progress.
2082 */
b0d623f7 2083 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2084 vm_page_wait(THREAD_UNINT);
2085 return;
2086 }
39236c6e 2087
7ddcb079 2088 zcram(vm_page_zone, addr, PAGE_SIZE);
6d2010ae 2089
b0d623f7 2090 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
2091}
2092
1c79356b
A
2093
2094/*
2095 * vm_pool_low():
2096 *
2097 * Return true if it is not likely that a non-vm_privileged thread
2098 * can get memory without blocking. Advisory only, since the
2099 * situation may change under us.
2100 */
2101int
2102vm_pool_low(void)
2103{
2104 /* No locking, at worst we will fib. */
b0d623f7 2105 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
2106}
2107
0c530ab8
A
2108
2109
2110/*
2111 * this is an interface to support bring-up of drivers
2112 * on platforms with physical memory > 4G...
2113 */
fe8ab488 2114int vm_himemory_mode = 2;
0c530ab8
A
2115
2116
2117/*
2118 * this interface exists to support hardware controllers
2119 * incapable of generating DMAs with more than 32 bits
2120 * of address on platforms with physical memory > 4G...
2121 */
0b4c1975
A
2122unsigned int vm_lopages_allocated_q = 0;
2123unsigned int vm_lopages_allocated_cpm_success = 0;
2124unsigned int vm_lopages_allocated_cpm_failed = 0;
2d21ac55 2125queue_head_t vm_lopage_queue_free;
0c530ab8
A
2126
2127vm_page_t
2128vm_page_grablo(void)
2129{
0b4c1975 2130 vm_page_t mem;
0c530ab8 2131
0b4c1975 2132 if (vm_lopage_needed == FALSE)
0c530ab8
A
2133 return (vm_page_grab());
2134
b0d623f7 2135 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 2136
0b4c1975
A
2137 if ( !queue_empty(&vm_lopage_queue_free)) {
2138 queue_remove_first(&vm_lopage_queue_free,
2139 mem,
2140 vm_page_t,
2141 pageq);
2142 assert(vm_lopage_free_count);
0c530ab8 2143
0b4c1975
A
2144 vm_lopage_free_count--;
2145 vm_lopages_allocated_q++;
2146
2147 if (vm_lopage_free_count < vm_lopage_lowater)
2148 vm_lopage_refill = TRUE;
0c530ab8 2149
0b4c1975 2150 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 2151 } else {
0b4c1975
A
2152 lck_mtx_unlock(&vm_page_queue_free_lock);
2153
2154 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2155
2156 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2157 vm_lopages_allocated_cpm_failed++;
2158 lck_mtx_unlock(&vm_page_queue_free_lock);
2159
2160 return (VM_PAGE_NULL);
2161 }
2162 mem->busy = TRUE;
2163
2164 vm_page_lockspin_queues();
2165
2166 mem->gobbled = FALSE;
2167 vm_page_gobble_count--;
2168 vm_page_wire_count--;
2169
2170 vm_lopages_allocated_cpm_success++;
2171 vm_page_unlock_queues();
0c530ab8 2172 }
0b4c1975
A
2173 assert(mem->busy);
2174 assert(!mem->free);
2175 assert(!mem->pmapped);
2176 assert(!mem->wpmapped);
7ddcb079 2177 assert(!pmap_is_noencrypt(mem->phys_page));
0b4c1975
A
2178
2179 mem->pageq.next = NULL;
2180 mem->pageq.prev = NULL;
0c530ab8
A
2181
2182 return (mem);
2183}
2184
6d2010ae 2185
1c79356b
A
2186/*
2187 * vm_page_grab:
2188 *
2d21ac55
A
2189 * first try to grab a page from the per-cpu free list...
2190 * this must be done while pre-emption is disabled... if
2191 * a page is available, we're done...
2192 * if no page is available, grab the vm_page_queue_free_lock
2193 * and see if current number of free pages would allow us
2194 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2195 * if there are pages available, disable preemption and
2196 * recheck the state of the per-cpu free list... we could
2197 * have been preempted and moved to a different cpu, or
2198 * some other thread could have re-filled it... if still
2199 * empty, figure out how many pages we can steal from the
2200 * global free queue and move to the per-cpu queue...
2201 * return 1 of these pages when done... only wakeup the
2202 * pageout_scan thread if we moved pages from the global
2203 * list... no need for the wakeup if we've satisfied the
2204 * request from the per-cpu queue.
1c79356b
A
2205 */
2206
1c79356b
A
2207
2208vm_page_t
2d21ac55 2209vm_page_grab( void )
1c79356b 2210{
2d21ac55
A
2211 vm_page_t mem;
2212
2213
2214 disable_preemption();
2215
2216 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2217return_page_from_cpu_list:
2218 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2219 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2d21ac55
A
2220
2221 enable_preemption();
fe8ab488 2222 mem->pageq.next = NULL;
2d21ac55
A
2223
2224 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2225 assert(mem->tabled == FALSE);
2226 assert(mem->object == VM_OBJECT_NULL);
2227 assert(!mem->laundry);
2228 assert(!mem->free);
2229 assert(pmap_verify_free(mem->phys_page));
2230 assert(mem->busy);
2231 assert(!mem->encrypted);
2232 assert(!mem->pmapped);
4a3eedf9 2233 assert(!mem->wpmapped);
6d2010ae
A
2234 assert(!mem->active);
2235 assert(!mem->inactive);
2236 assert(!mem->throttled);
2237 assert(!mem->speculative);
7ddcb079 2238 assert(!pmap_is_noencrypt(mem->phys_page));
2d21ac55
A
2239
2240 return mem;
2241 }
2242 enable_preemption();
2243
1c79356b 2244
1c79356b
A
2245 /*
2246 * Optionally produce warnings if the wire or gobble
2247 * counts exceed some threshold.
2248 */
fe8ab488
A
2249#if VM_PAGE_WIRE_COUNT_WARNING
2250 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
1c79356b
A
2251 printf("mk: vm_page_grab(): high wired page count of %d\n",
2252 vm_page_wire_count);
1c79356b 2253 }
fe8ab488
A
2254#endif
2255#if VM_PAGE_GOBBLE_COUNT_WARNING
2256 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
1c79356b
A
2257 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2258 vm_page_gobble_count);
1c79356b 2259 }
fe8ab488 2260#endif
b0d623f7
A
2261 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2262
1c79356b
A
2263 /*
2264 * Only let privileged threads (involved in pageout)
2265 * dip into the reserved pool.
2266 */
1c79356b 2267 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 2268 !(current_thread()->options & TH_OPT_VMPRIV)) {
b0d623f7 2269 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2270 mem = VM_PAGE_NULL;
1c79356b 2271 }
2d21ac55
A
2272 else {
2273 vm_page_t head;
2274 vm_page_t tail;
2275 unsigned int pages_to_steal;
2276 unsigned int color;
1c79356b 2277
2d21ac55 2278 while ( vm_page_free_count == 0 ) {
1c79356b 2279
b0d623f7 2280 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2281 /*
2282 * must be a privileged thread to be
2283 * in this state since a non-privileged
2284 * thread would have bailed if we were
2285 * under the vm_page_free_reserved mark
2286 */
2287 VM_PAGE_WAIT();
b0d623f7 2288 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2289 }
2290
2291 disable_preemption();
2292
2293 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 2294 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2295
2296 /*
2297 * we got preempted and moved to another processor
2298 * or we got preempted and someone else ran and filled the cache
2299 */
2300 goto return_page_from_cpu_list;
2301 }
2302 if (vm_page_free_count <= vm_page_free_reserved)
2303 pages_to_steal = 1;
2304 else {
fe8ab488
A
2305 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2306 pages_to_steal = vm_free_magazine_refill_limit;
2307 else
2d21ac55
A
2308 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2309 }
2310 color = PROCESSOR_DATA(current_processor(), start_color);
2311 head = tail = NULL;
2312
fe8ab488
A
2313 vm_page_free_count -= pages_to_steal;
2314
2d21ac55 2315 while (pages_to_steal--) {
2d21ac55
A
2316
2317 while (queue_empty(&vm_page_queue_free[color]))
2318 color = (color + 1) & vm_color_mask;
2319
2320 queue_remove_first(&vm_page_queue_free[color],
2321 mem,
2322 vm_page_t,
2323 pageq);
2324 mem->pageq.next = NULL;
2325 mem->pageq.prev = NULL;
2326
6d2010ae
A
2327 assert(!mem->active);
2328 assert(!mem->inactive);
2329 assert(!mem->throttled);
2330 assert(!mem->speculative);
2331
2d21ac55
A
2332 color = (color + 1) & vm_color_mask;
2333
2334 if (head == NULL)
2335 head = mem;
2336 else
2337 tail->pageq.next = (queue_t)mem;
2338 tail = mem;
2339
2d21ac55
A
2340 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2341 assert(mem->tabled == FALSE);
2342 assert(mem->object == VM_OBJECT_NULL);
2343 assert(!mem->laundry);
2344 assert(mem->free);
2345 mem->free = FALSE;
2346
2347 assert(pmap_verify_free(mem->phys_page));
2348 assert(mem->busy);
2349 assert(!mem->free);
2350 assert(!mem->encrypted);
2351 assert(!mem->pmapped);
4a3eedf9 2352 assert(!mem->wpmapped);
7ddcb079 2353 assert(!pmap_is_noencrypt(mem->phys_page));
2d21ac55 2354 }
fe8ab488
A
2355 lck_mtx_unlock(&vm_page_queue_free_lock);
2356
2d21ac55
A
2357 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2358 PROCESSOR_DATA(current_processor(), start_color) = color;
2359
2360 /*
2361 * satisfy this request
2362 */
2363 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2364 mem = head;
2365 mem->pageq.next = NULL;
91447636 2366
2d21ac55
A
2367 enable_preemption();
2368 }
1c79356b
A
2369 /*
2370 * Decide if we should poke the pageout daemon.
2371 * We do this if the free count is less than the low
2372 * water mark, or if the free count is less than the high
2373 * water mark (but above the low water mark) and the inactive
2374 * count is less than its target.
2375 *
2376 * We don't have the counts locked ... if they change a little,
2377 * it doesn't really matter.
2378 */
1c79356b 2379 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
2380 ((vm_page_free_count < vm_page_free_target) &&
2381 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2382 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 2383
6d2010ae
A
2384 VM_CHECK_MEMORYSTATUS;
2385
55e303ae 2386// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1c79356b
A
2387
2388 return mem;
2389}
2390
2391/*
2392 * vm_page_release:
2393 *
2394 * Return a page to the free list.
2395 */
2396
2397void
2398vm_page_release(
2399 register vm_page_t mem)
2400{
2d21ac55 2401 unsigned int color;
b0d623f7
A
2402 int need_wakeup = 0;
2403 int need_priv_wakeup = 0;
55e303ae 2404
6d2010ae 2405
1c79356b 2406 assert(!mem->private && !mem->fictitious);
b0d623f7
A
2407 if (vm_page_free_verify) {
2408 assert(pmap_verify_free(mem->phys_page));
2409 }
55e303ae 2410// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 2411
7ddcb079
A
2412 pmap_clear_noencrypt(mem->phys_page);
2413
b0d623f7 2414 lck_mtx_lock_spin(&vm_page_queue_free_lock);
91447636 2415#if DEBUG
1c79356b
A
2416 if (mem->free)
2417 panic("vm_page_release");
91447636 2418#endif
6d2010ae 2419
2d21ac55 2420 assert(mem->busy);
91447636
A
2421 assert(!mem->laundry);
2422 assert(mem->object == VM_OBJECT_NULL);
2423 assert(mem->pageq.next == NULL &&
2424 mem->pageq.prev == NULL);
2d21ac55
A
2425 assert(mem->listq.next == NULL &&
2426 mem->listq.prev == NULL);
2427
6d2010ae 2428 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975
A
2429 vm_lopage_free_count < vm_lopage_free_limit &&
2430 mem->phys_page < max_valid_low_ppnum) {
0c530ab8
A
2431 /*
2432 * this exists to support hardware controllers
2433 * incapable of generating DMAs with more than 32 bits
2434 * of address on platforms with physical memory > 4G...
2435 */
2d21ac55
A
2436 queue_enter_first(&vm_lopage_queue_free,
2437 mem,
2438 vm_page_t,
2439 pageq);
0c530ab8 2440 vm_lopage_free_count++;
0b4c1975
A
2441
2442 if (vm_lopage_free_count >= vm_lopage_free_limit)
2443 vm_lopage_refill = FALSE;
2444
2445 mem->lopage = TRUE;
0c530ab8 2446 } else {
6d2010ae 2447 mem->lopage = FALSE;
0b4c1975
A
2448 mem->free = TRUE;
2449
2d21ac55
A
2450 color = mem->phys_page & vm_color_mask;
2451 queue_enter_first(&vm_page_queue_free[color],
2452 mem,
2453 vm_page_t,
2454 pageq);
0c530ab8
A
2455 vm_page_free_count++;
2456 /*
2457 * Check if we should wake up someone waiting for page.
2458 * But don't bother waking them unless they can allocate.
2459 *
2460 * We wakeup only one thread, to prevent starvation.
2461 * Because the scheduling system handles wait queues FIFO,
2462 * if we wakeup all waiting threads, one greedy thread
2463 * can starve multiple niceguy threads. When the threads
2464 * all wakeup, the greedy threads runs first, grabs the page,
2465 * and waits for another page. It will be the first to run
2466 * when the next page is freed.
2467 *
2468 * However, there is a slight danger here.
2469 * The thread we wake might not use the free page.
2470 * Then the other threads could wait indefinitely
2471 * while the page goes unused. To forestall this,
2472 * the pageout daemon will keep making free pages
2473 * as long as vm_page_free_wanted is non-zero.
2474 */
1c79356b 2475
b0d623f7
A
2476 assert(vm_page_free_count > 0);
2477 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 2478 vm_page_free_wanted_privileged--;
b0d623f7
A
2479 need_priv_wakeup = 1;
2480 } else if (vm_page_free_wanted > 0 &&
2481 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 2482 vm_page_free_wanted--;
b0d623f7 2483 need_wakeup = 1;
0c530ab8 2484 }
1c79356b 2485 }
b0d623f7
A
2486 lck_mtx_unlock(&vm_page_queue_free_lock);
2487
2488 if (need_priv_wakeup)
2489 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2490 else if (need_wakeup)
2491 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55 2492
6d2010ae 2493 VM_CHECK_MEMORYSTATUS;
1c79356b
A
2494}
2495
fe8ab488
A
2496/*
2497 * This version of vm_page_release() is used only at startup
2498 * when we are single-threaded and pages are being released
2499 * for the first time. Hence, no locking or unnecessary checks are made.
2500 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2501 */
2502void
2503vm_page_release_startup(
2504 register vm_page_t mem)
2505{
2506 queue_t queue_free;
2507
2508 if (vm_lopage_free_count < vm_lopage_free_limit &&
2509 mem->phys_page < max_valid_low_ppnum) {
2510 mem->lopage = TRUE;
2511 vm_lopage_free_count++;
2512 queue_free = &vm_lopage_queue_free;
2513 } else {
2514 mem->lopage = FALSE;
2515 mem->free = TRUE;
2516 vm_page_free_count++;
2517 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2518 }
2519 queue_enter_first(queue_free, mem, vm_page_t, pageq);
2520}
2521
1c79356b
A
2522/*
2523 * vm_page_wait:
2524 *
2525 * Wait for a page to become available.
2526 * If there are plenty of free pages, then we don't sleep.
2527 *
2528 * Returns:
2529 * TRUE: There may be another page, try again
2530 * FALSE: We were interrupted out of our wait, don't try again
2531 */
2532
2533boolean_t
2534vm_page_wait(
2535 int interruptible )
2536{
2537 /*
2538 * We can't use vm_page_free_reserved to make this
2539 * determination. Consider: some thread might
2540 * need to allocate two pages. The first allocation
2541 * succeeds, the second fails. After the first page is freed,
2542 * a call to vm_page_wait must really block.
2543 */
9bccf70c 2544 kern_return_t wait_result;
9bccf70c 2545 int need_wakeup = 0;
2d21ac55 2546 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 2547
b0d623f7 2548 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2549
2550 if (is_privileged && vm_page_free_count) {
b0d623f7 2551 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2552 return TRUE;
2553 }
1c79356b 2554 if (vm_page_free_count < vm_page_free_target) {
2d21ac55
A
2555
2556 if (is_privileged) {
2557 if (vm_page_free_wanted_privileged++ == 0)
2558 need_wakeup = 1;
2559 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2560 } else {
2561 if (vm_page_free_wanted++ == 0)
2562 need_wakeup = 1;
2563 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2564 }
b0d623f7 2565 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2566 counter(c_vm_page_wait_block++);
0b4e3aa0
A
2567
2568 if (need_wakeup)
2569 thread_wakeup((event_t)&vm_page_free_wanted);
9bccf70c 2570
39236c6e
A
2571 if (wait_result == THREAD_WAITING) {
2572 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2573 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
9bccf70c 2574 wait_result = thread_block(THREAD_CONTINUE_NULL);
39236c6e
A
2575 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2576 }
9bccf70c 2577
1c79356b
A
2578 return(wait_result == THREAD_AWAKENED);
2579 } else {
b0d623f7 2580 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b
A
2581 return TRUE;
2582 }
2583}
2584
2585/*
2586 * vm_page_alloc:
2587 *
2588 * Allocate and return a memory cell associated
2589 * with this VM object/offset pair.
2590 *
2591 * Object must be locked.
2592 */
2593
2594vm_page_t
2595vm_page_alloc(
2596 vm_object_t object,
2597 vm_object_offset_t offset)
2598{
2599 register vm_page_t mem;
2600
2d21ac55 2601 vm_object_lock_assert_exclusive(object);
1c79356b
A
2602 mem = vm_page_grab();
2603 if (mem == VM_PAGE_NULL)
2604 return VM_PAGE_NULL;
2605
2606 vm_page_insert(mem, object, offset);
2607
2608 return(mem);
2609}
2610
2d21ac55
A
2611/*
2612 * vm_page_alloc_guard:
2613 *
b0d623f7 2614 * Allocate a fictitious page which will be used
2d21ac55
A
2615 * as a guard page. The page will be inserted into
2616 * the object and returned to the caller.
2617 */
2618
2619vm_page_t
2620vm_page_alloc_guard(
2621 vm_object_t object,
2622 vm_object_offset_t offset)
2623{
2624 register vm_page_t mem;
2625
2626 vm_object_lock_assert_exclusive(object);
2627 mem = vm_page_grab_guard();
2628 if (mem == VM_PAGE_NULL)
2629 return VM_PAGE_NULL;
2630
2631 vm_page_insert(mem, object, offset);
2632
2633 return(mem);
2634}
2635
2636
1c79356b
A
2637counter(unsigned int c_laundry_pages_freed = 0;)
2638
1c79356b 2639/*
6d2010ae 2640 * vm_page_free_prepare:
1c79356b 2641 *
6d2010ae
A
2642 * Removes page from any queue it may be on
2643 * and disassociates it from its VM object.
1c79356b
A
2644 *
2645 * Object and page queues must be locked prior to entry.
2646 */
b0d623f7 2647static void
2d21ac55 2648vm_page_free_prepare(
6d2010ae 2649 vm_page_t mem)
b0d623f7
A
2650{
2651 vm_page_free_prepare_queues(mem);
2652 vm_page_free_prepare_object(mem, TRUE);
2653}
2654
2655
2656void
2657vm_page_free_prepare_queues(
2658 vm_page_t mem)
1c79356b 2659{
2d21ac55 2660 VM_PAGE_CHECK(mem);
1c79356b
A
2661 assert(!mem->free);
2662 assert(!mem->cleaning);
fe8ab488
A
2663
2664#if MACH_ASSERT || DEBUG
b0d623f7 2665 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2666 if (mem->free)
b0d623f7 2667 panic("vm_page_free: freeing page on free list\n");
fe8ab488 2668#endif /* MACH_ASSERT || DEBUG */
b0d623f7
A
2669 if (mem->object) {
2670 vm_object_lock_assert_exclusive(mem->object);
2671 }
2d21ac55
A
2672 if (mem->laundry) {
2673 /*
2674 * We may have to free a page while it's being laundered
2675 * if we lost its pager (due to a forced unmount, for example).
316670eb
A
2676 * We need to call vm_pageout_steal_laundry() before removing
2677 * the page from its VM object, so that we can remove it
2678 * from its pageout queue and adjust the laundry accounting
2d21ac55 2679 */
316670eb 2680 vm_pageout_steal_laundry(mem, TRUE);
2d21ac55
A
2681 counter(++c_laundry_pages_freed);
2682 }
39236c6e 2683
3e170ce0 2684 vm_page_queues_remove(mem); /* clears local/active/inactive/throttled/speculative */
b0d623f7
A
2685
2686 if (VM_PAGE_WIRED(mem)) {
2687 if (mem->object) {
2688 assert(mem->object->wired_page_count > 0);
2689 mem->object->wired_page_count--;
3e170ce0
A
2690 if (!mem->object->wired_page_count) {
2691 VM_OBJECT_UNWIRED(mem->object);
2692 }
2693
b0d623f7
A
2694 assert(mem->object->resident_page_count >=
2695 mem->object->wired_page_count);
6d2010ae
A
2696
2697 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2698 OSAddAtomic(+1, &vm_page_purgeable_count);
2699 assert(vm_page_purgeable_wired_count > 0);
2700 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2701 }
fe8ab488
A
2702 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2703 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2704 mem->object->vo_purgeable_owner != TASK_NULL) {
2705 task_t owner;
2706
2707 owner = mem->object->vo_purgeable_owner;
2708 /*
2709 * While wired, this page was accounted
2710 * as "non-volatile" but it should now
2711 * be accounted as "volatile".
2712 */
2713 /* one less "non-volatile"... */
2714 ledger_debit(owner->ledger,
2715 task_ledgers.purgeable_nonvolatile,
2716 PAGE_SIZE);
2717 /* ... and "phys_footprint" */
2718 ledger_debit(owner->ledger,
2719 task_ledgers.phys_footprint,
2720 PAGE_SIZE);
2721 /* one more "volatile" */
2722 ledger_credit(owner->ledger,
2723 task_ledgers.purgeable_volatile,
2724 PAGE_SIZE);
2725 }
b0d623f7 2726 }
1c79356b
A
2727 if (!mem->private && !mem->fictitious)
2728 vm_page_wire_count--;
2729 mem->wire_count = 0;
2730 assert(!mem->gobbled);
2731 } else if (mem->gobbled) {
2732 if (!mem->private && !mem->fictitious)
2733 vm_page_wire_count--;
2734 vm_page_gobble_count--;
2735 }
b0d623f7
A
2736}
2737
2738
2739void
2740vm_page_free_prepare_object(
2741 vm_page_t mem,
2742 boolean_t remove_from_hash)
2743{
b0d623f7
A
2744 if (mem->tabled)
2745 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 2746
b0d623f7 2747 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b
A
2748
2749 if (mem->private) {
2750 mem->private = FALSE;
2751 mem->fictitious = TRUE;
55e303ae 2752 mem->phys_page = vm_page_fictitious_addr;
1c79356b 2753 }
6d2010ae 2754 if ( !mem->fictitious) {
0b4c1975 2755 vm_page_init(mem, mem->phys_page, mem->lopage);
1c79356b
A
2756 }
2757}
2758
b0d623f7 2759
6d2010ae
A
2760/*
2761 * vm_page_free:
2762 *
2763 * Returns the given page to the free list,
2764 * disassociating it with any VM object.
2765 *
2766 * Object and page queues must be locked prior to entry.
2767 */
2d21ac55
A
2768void
2769vm_page_free(
2770 vm_page_t mem)
2771{
b0d623f7 2772 vm_page_free_prepare(mem);
6d2010ae 2773
b0d623f7
A
2774 if (mem->fictitious) {
2775 vm_page_release_fictitious(mem);
2776 } else {
2777 vm_page_release(mem);
2778 }
2779}
2780
2781
2782void
2783vm_page_free_unlocked(
2784 vm_page_t mem,
2785 boolean_t remove_from_hash)
2786{
2787 vm_page_lockspin_queues();
2788 vm_page_free_prepare_queues(mem);
2789 vm_page_unlock_queues();
2790
2791 vm_page_free_prepare_object(mem, remove_from_hash);
2792
2d21ac55
A
2793 if (mem->fictitious) {
2794 vm_page_release_fictitious(mem);
2795 } else {
2796 vm_page_release(mem);
2797 }
2798}
55e303ae 2799
316670eb 2800
2d21ac55
A
2801/*
2802 * Free a list of pages. The list can be up to several hundred pages,
2803 * as blocked up by vm_pageout_scan().
b0d623f7 2804 * The big win is not having to take the free list lock once
316670eb 2805 * per page.
2d21ac55 2806 */
55e303ae
A
2807void
2808vm_page_free_list(
316670eb 2809 vm_page_t freeq,
b0d623f7 2810 boolean_t prepare_object)
55e303ae 2811{
316670eb 2812 vm_page_t mem;
2d21ac55 2813 vm_page_t nxt;
316670eb
A
2814 vm_page_t local_freeq;
2815 int pg_count;
2d21ac55 2816
316670eb 2817 while (freeq) {
55e303ae 2818
316670eb
A
2819 pg_count = 0;
2820 local_freeq = VM_PAGE_NULL;
2821 mem = freeq;
b0d623f7 2822
316670eb
A
2823 /*
2824 * break up the processing into smaller chunks so
2825 * that we can 'pipeline' the pages onto the
2826 * free list w/o introducing too much
2827 * contention on the global free queue lock
2828 */
2829 while (mem && pg_count < 64) {
2830
2831 assert(!mem->inactive);
2832 assert(!mem->active);
2833 assert(!mem->throttled);
2834 assert(!mem->free);
2835 assert(!mem->speculative);
2836 assert(!VM_PAGE_WIRED(mem));
2837 assert(mem->pageq.prev == NULL);
2838
2839 nxt = (vm_page_t)(mem->pageq.next);
b0d623f7 2840
316670eb
A
2841 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2842 assert(pmap_verify_free(mem->phys_page));
2843 }
2844 if (prepare_object == TRUE)
2845 vm_page_free_prepare_object(mem, TRUE);
b0d623f7 2846
316670eb
A
2847 if (!mem->fictitious) {
2848 assert(mem->busy);
55e303ae 2849
316670eb
A
2850 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2851 vm_lopage_free_count < vm_lopage_free_limit &&
2852 mem->phys_page < max_valid_low_ppnum) {
2853 mem->pageq.next = NULL;
2854 vm_page_release(mem);
2855 } else {
2856 /*
2857 * IMPORTANT: we can't set the page "free" here
2858 * because that would make the page eligible for
2859 * a physically-contiguous allocation (see
2860 * vm_page_find_contiguous()) right away (we don't
2861 * hold the vm_page_queue_free lock). That would
2862 * cause trouble because the page is not actually
2863 * in the free queue yet...
2864 */
2865 mem->pageq.next = (queue_entry_t)local_freeq;
2866 local_freeq = mem;
2867 pg_count++;
935ed37a 2868
316670eb 2869 pmap_clear_noencrypt(mem->phys_page);
935ed37a 2870 }
316670eb
A
2871 } else {
2872 assert(mem->phys_page == vm_page_fictitious_addr ||
2873 mem->phys_page == vm_page_guard_addr);
2874 vm_page_release_fictitious(mem);
2d21ac55 2875 }
316670eb 2876 mem = nxt;
55e303ae 2877 }
316670eb
A
2878 freeq = mem;
2879
2880 if ( (mem = local_freeq) ) {
2881 unsigned int avail_free_count;
2882 unsigned int need_wakeup = 0;
2883 unsigned int need_priv_wakeup = 0;
2d21ac55 2884
316670eb 2885 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 2886
316670eb
A
2887 while (mem) {
2888 int color;
2889
2890 nxt = (vm_page_t)(mem->pageq.next);
2d21ac55 2891
b0d623f7
A
2892 assert(!mem->free);
2893 assert(mem->busy);
2894 mem->free = TRUE;
b0d623f7 2895
316670eb
A
2896 color = mem->phys_page & vm_color_mask;
2897 queue_enter_first(&vm_page_queue_free[color],
2898 mem,
2899 vm_page_t,
2900 pageq);
2901 mem = nxt;
2d21ac55 2902 }
316670eb
A
2903 vm_page_free_count += pg_count;
2904 avail_free_count = vm_page_free_count;
2905
2906 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2907
2908 if (avail_free_count < vm_page_free_wanted_privileged) {
2909 need_priv_wakeup = avail_free_count;
2910 vm_page_free_wanted_privileged -= avail_free_count;
2911 avail_free_count = 0;
2912 } else {
2913 need_priv_wakeup = vm_page_free_wanted_privileged;
2914 vm_page_free_wanted_privileged = 0;
2915 avail_free_count -= vm_page_free_wanted_privileged;
2916 }
b0d623f7 2917 }
316670eb
A
2918 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2919 unsigned int available_pages;
55e303ae 2920
316670eb 2921 available_pages = avail_free_count - vm_page_free_reserved;
55e303ae 2922
316670eb
A
2923 if (available_pages >= vm_page_free_wanted) {
2924 need_wakeup = vm_page_free_wanted;
2925 vm_page_free_wanted = 0;
2926 } else {
2927 need_wakeup = available_pages;
2928 vm_page_free_wanted -= available_pages;
2929 }
2930 }
2931 lck_mtx_unlock(&vm_page_queue_free_lock);
55e303ae 2932
316670eb
A
2933 if (need_priv_wakeup != 0) {
2934 /*
2935 * There shouldn't be that many VM-privileged threads,
2936 * so let's wake them all up, even if we don't quite
2937 * have enough pages to satisfy them all.
2938 */
2939 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2940 }
2941 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2942 /*
2943 * We don't expect to have any more waiters
2944 * after this, so let's wake them all up at
2945 * once.
2946 */
2947 thread_wakeup((event_t) &vm_page_free_count);
2948 } else for (; need_wakeup != 0; need_wakeup--) {
2949 /*
2950 * Wake up one waiter per page we just released.
2951 */
2952 thread_wakeup_one((event_t) &vm_page_free_count);
55e303ae 2953 }
2d21ac55 2954
316670eb 2955 VM_CHECK_MEMORYSTATUS;
b0d623f7 2956 }
55e303ae
A
2957 }
2958}
2959
2960
1c79356b
A
2961/*
2962 * vm_page_wire:
2963 *
2964 * Mark this page as wired down by yet
2965 * another map, removing it from paging queues
2966 * as necessary.
2967 *
2968 * The page's object and the page queues must be locked.
2969 */
3e170ce0
A
2970
2971
1c79356b
A
2972void
2973vm_page_wire(
3e170ce0
A
2974 register vm_page_t mem,
2975 vm_tag_t tag,
2976 boolean_t check_memorystatus)
1c79356b
A
2977{
2978
91447636 2979// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1c79356b
A
2980
2981 VM_PAGE_CHECK(mem);
b0d623f7
A
2982 if (mem->object) {
2983 vm_object_lock_assert_exclusive(mem->object);
2984 } else {
2985 /*
2986 * In theory, the page should be in an object before it
2987 * gets wired, since we need to hold the object lock
2988 * to update some fields in the page structure.
2989 * However, some code (i386 pmap, for example) might want
2990 * to wire a page before it gets inserted into an object.
2991 * That's somewhat OK, as long as nobody else can get to
2992 * that page and update it at the same time.
2993 */
2994 }
91447636 2995#if DEBUG
b0d623f7 2996 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2997#endif
b0d623f7 2998 if ( !VM_PAGE_WIRED(mem)) {
316670eb
A
2999
3000 if (mem->pageout_queue) {
3001 mem->pageout = FALSE;
3002 vm_pageout_throttle_up(mem);
3003 }
3e170ce0 3004 vm_page_queues_remove(mem);
b0d623f7
A
3005
3006 if (mem->object) {
3e170ce0
A
3007
3008 if (!mem->private && !mem->fictitious)
3009 {
3010 if (!mem->object->wired_page_count)
3011 {
3012 assert(VM_KERN_MEMORY_NONE != tag);
3013 mem->object->wire_tag = tag;
3014 VM_OBJECT_WIRED(mem->object);
3015 }
3016 }
b0d623f7 3017 mem->object->wired_page_count++;
3e170ce0 3018
b0d623f7
A
3019 assert(mem->object->resident_page_count >=
3020 mem->object->wired_page_count);
3021 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
3022 assert(vm_page_purgeable_count > 0);
3023 OSAddAtomic(-1, &vm_page_purgeable_count);
3024 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3025 }
fe8ab488
A
3026 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
3027 mem->object->purgable == VM_PURGABLE_EMPTY) &&
3028 mem->object->vo_purgeable_owner != TASK_NULL) {
3029 task_t owner;
3030
3031 owner = mem->object->vo_purgeable_owner;
3032 /* less volatile bytes */
3033 ledger_debit(owner->ledger,
3034 task_ledgers.purgeable_volatile,
3035 PAGE_SIZE);
3036 /* more not-quite-volatile bytes */
3037 ledger_credit(owner->ledger,
3038 task_ledgers.purgeable_nonvolatile,
3039 PAGE_SIZE);
3040 /* more footprint */
3041 ledger_credit(owner->ledger,
3042 task_ledgers.phys_footprint,
3043 PAGE_SIZE);
3044 }
b0d623f7
A
3045 if (mem->object->all_reusable) {
3046 /*
3047 * Wired pages are not counted as "re-usable"
3048 * in "all_reusable" VM objects, so nothing
3049 * to do here.
3050 */
3051 } else if (mem->reusable) {
3052 /*
3053 * This page is not "re-usable" when it's
3054 * wired, so adjust its state and the
3055 * accounting.
3056 */
3057 vm_object_reuse_pages(mem->object,
3058 mem->offset,
3059 mem->offset+PAGE_SIZE_64,
3060 FALSE);
3061 }
3062 }
3063 assert(!mem->reusable);
3064
1c79356b
A
3065 if (!mem->private && !mem->fictitious && !mem->gobbled)
3066 vm_page_wire_count++;
3067 if (mem->gobbled)
3068 vm_page_gobble_count--;
3069 mem->gobbled = FALSE;
593a1d5f 3070
3e170ce0
A
3071 if (check_memorystatus == TRUE) {
3072 VM_CHECK_MEMORYSTATUS;
3073 }
91447636
A
3074 /*
3075 * ENCRYPTED SWAP:
3076 * The page could be encrypted, but
3077 * We don't have to decrypt it here
3078 * because we don't guarantee that the
3079 * data is actually valid at this point.
3080 * The page will get decrypted in
3081 * vm_fault_wire() if needed.
3082 */
1c79356b
A
3083 }
3084 assert(!mem->gobbled);
3085 mem->wire_count++;
b0d623f7 3086 VM_PAGE_CHECK(mem);
1c79356b
A
3087}
3088
1c79356b
A
3089/*
3090 * vm_page_unwire:
3091 *
3092 * Release one wiring of this page, potentially
3093 * enabling it to be paged again.
3094 *
3095 * The page's object and the page queues must be locked.
3096 */
3097void
3098vm_page_unwire(
0b4c1975
A
3099 vm_page_t mem,
3100 boolean_t queueit)
1c79356b
A
3101{
3102
91447636 3103// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1c79356b
A
3104
3105 VM_PAGE_CHECK(mem);
b0d623f7 3106 assert(VM_PAGE_WIRED(mem));
4bd07ac2 3107 assert(!mem->gobbled);
b0d623f7 3108 assert(mem->object != VM_OBJECT_NULL);
91447636 3109#if DEBUG
b0d623f7
A
3110 vm_object_lock_assert_exclusive(mem->object);
3111 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 3112#endif
1c79356b 3113 if (--mem->wire_count == 0) {
4bd07ac2
A
3114 if (!mem->private && !mem->fictitious) {
3115 vm_page_wire_count--;
3116 }
b0d623f7
A
3117 assert(mem->object->wired_page_count > 0);
3118 mem->object->wired_page_count--;
3e170ce0
A
3119 if (!mem->object->wired_page_count) {
3120 VM_OBJECT_UNWIRED(mem->object);
3121 }
b0d623f7
A
3122 assert(mem->object->resident_page_count >=
3123 mem->object->wired_page_count);
3124 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
3125 OSAddAtomic(+1, &vm_page_purgeable_count);
3126 assert(vm_page_purgeable_wired_count > 0);
3127 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3128 }
fe8ab488
A
3129 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
3130 mem->object->purgable == VM_PURGABLE_EMPTY) &&
3131 mem->object->vo_purgeable_owner != TASK_NULL) {
3132 task_t owner;
3133
3134 owner = mem->object->vo_purgeable_owner;
3135 /* more volatile bytes */
3136 ledger_credit(owner->ledger,
3137 task_ledgers.purgeable_volatile,
3138 PAGE_SIZE);
3139 /* less not-quite-volatile bytes */
3140 ledger_debit(owner->ledger,
3141 task_ledgers.purgeable_nonvolatile,
3142 PAGE_SIZE);
3143 /* less footprint */
3144 ledger_debit(owner->ledger,
3145 task_ledgers.phys_footprint,
3146 PAGE_SIZE);
3147 }
91447636
A
3148 assert(mem->object != kernel_object);
3149 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
0b4c1975
A
3150
3151 if (queueit == TRUE) {
3152 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3153 vm_page_deactivate(mem);
3154 } else {
3155 vm_page_activate(mem);
3156 }
2d21ac55 3157 }
593a1d5f 3158
6d2010ae
A
3159 VM_CHECK_MEMORYSTATUS;
3160
1c79356b 3161 }
b0d623f7 3162 VM_PAGE_CHECK(mem);
1c79356b
A
3163}
3164
3165/*
3166 * vm_page_deactivate:
3167 *
3168 * Returns the given page to the inactive list,
3169 * indicating that no physical maps have access
3170 * to this page. [Used by the physical mapping system.]
3171 *
3172 * The page queues must be locked.
3173 */
3174void
3175vm_page_deactivate(
b0d623f7
A
3176 vm_page_t m)
3177{
3178 vm_page_deactivate_internal(m, TRUE);
3179}
3180
3181
3182void
3183vm_page_deactivate_internal(
3184 vm_page_t m,
3185 boolean_t clear_hw_reference)
1c79356b 3186{
2d21ac55 3187
1c79356b 3188 VM_PAGE_CHECK(m);
91447636 3189 assert(m->object != kernel_object);
2d21ac55 3190 assert(m->phys_page != vm_page_guard_addr);
1c79356b 3191
55e303ae 3192// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
91447636 3193#if DEBUG
b0d623f7 3194 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 3195#endif
1c79356b
A
3196 /*
3197 * This page is no longer very interesting. If it was
3198 * interesting (active or inactive/referenced), then we
3199 * clear the reference bit and (re)enter it in the
3200 * inactive queue. Note wired pages should not have
3201 * their reference bit cleared.
3202 */
6d2010ae 3203 assert ( !(m->absent && !m->unusual));
0b4c1975 3204
1c79356b 3205 if (m->gobbled) { /* can this happen? */
b0d623f7 3206 assert( !VM_PAGE_WIRED(m));
2d21ac55 3207
1c79356b
A
3208 if (!m->private && !m->fictitious)
3209 vm_page_wire_count--;
3210 vm_page_gobble_count--;
3211 m->gobbled = FALSE;
3212 }
316670eb
A
3213 /*
3214 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3215 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3216 * and we can't remove it manually since we would need the object lock
3217 * (which is not required here) to decrement the activity_in_progress
3218 * reference which is held on the object while the page is in the pageout queue...
3219 * just let the normal laundry processing proceed
3220 */
fe8ab488 3221 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
1c79356b 3222 return;
2d21ac55 3223
6d2010ae 3224 if (!m->absent && clear_hw_reference == TRUE)
2d21ac55
A
3225 pmap_clear_reference(m->phys_page);
3226
3227 m->reference = FALSE;
2d21ac55
A
3228 m->no_cache = FALSE;
3229
3230 if (!m->inactive) {
3e170ce0 3231 vm_page_queues_remove(m);
0b4e3aa0 3232
6d2010ae 3233 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
d1ecb069
A
3234 m->dirty && m->object->internal &&
3235 (m->object->purgable == VM_PURGABLE_DENY ||
3236 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3237 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3e170ce0 3238 vm_page_check_pageable_safe(m);
2d21ac55
A
3239 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3240 m->throttled = TRUE;
3241 vm_page_throttled_count++;
9bccf70c 3242 } else {
6d2010ae 3243 if (m->object->named && m->object->ref_count == 1) {
2d21ac55 3244 vm_page_speculate(m, FALSE);
b0d623f7 3245#if DEVELOPMENT || DEBUG
2d21ac55 3246 vm_page_speculative_recreated++;
b0d623f7 3247#endif
2d21ac55 3248 } else {
3e170ce0 3249 vm_page_enqueue_inactive(m, FALSE);
2d21ac55 3250 }
9bccf70c 3251 }
1c79356b
A
3252 }
3253}
3254
316670eb
A
3255/*
3256 * vm_page_enqueue_cleaned
3257 *
3258 * Put the page on the cleaned queue, mark it cleaned, etc.
3259 * Being on the cleaned queue (and having m->clean_queue set)
3260 * does ** NOT ** guarantee that the page is clean!
3261 *
3262 * Call with the queues lock held.
3263 */
3264
3265void vm_page_enqueue_cleaned(vm_page_t m)
3266{
3267 assert(m->phys_page != vm_page_guard_addr);
3268#if DEBUG
3269 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3270#endif
3271 assert( !(m->absent && !m->unusual));
3272
3273 if (m->gobbled) {
3274 assert( !VM_PAGE_WIRED(m));
3275 if (!m->private && !m->fictitious)
3276 vm_page_wire_count--;
3277 vm_page_gobble_count--;
3278 m->gobbled = FALSE;
3279 }
3280 /*
3281 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3282 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3283 * and we can't remove it manually since we would need the object lock
3284 * (which is not required here) to decrement the activity_in_progress
3285 * reference which is held on the object while the page is in the pageout queue...
3286 * just let the normal laundry processing proceed
3287 */
fe8ab488 3288 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
316670eb
A
3289 return;
3290
3e170ce0 3291 vm_page_queues_remove(m);
316670eb 3292
3e170ce0 3293 vm_page_check_pageable_safe(m);
316670eb
A
3294 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3295 m->clean_queue = TRUE;
3296 vm_page_cleaned_count++;
3297
3298 m->inactive = TRUE;
3299 vm_page_inactive_count++;
39236c6e
A
3300 if (m->object->internal) {
3301 vm_page_pageable_internal_count++;
3302 } else {
3303 vm_page_pageable_external_count++;
3304 }
316670eb
A
3305
3306 vm_pageout_enqueued_cleaned++;
3307}
3308
1c79356b
A
3309/*
3310 * vm_page_activate:
3311 *
3312 * Put the specified page on the active list (if appropriate).
3313 *
3314 * The page queues must be locked.
3315 */
3316
3317void
3318vm_page_activate(
3319 register vm_page_t m)
3320{
3321 VM_PAGE_CHECK(m);
2d21ac55 3322#ifdef FIXME_4778297
91447636 3323 assert(m->object != kernel_object);
2d21ac55
A
3324#endif
3325 assert(m->phys_page != vm_page_guard_addr);
91447636 3326#if DEBUG
b0d623f7 3327 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 3328#endif
6d2010ae 3329 assert( !(m->absent && !m->unusual));
0b4c1975 3330
1c79356b 3331 if (m->gobbled) {
b0d623f7 3332 assert( !VM_PAGE_WIRED(m));
1c79356b
A
3333 if (!m->private && !m->fictitious)
3334 vm_page_wire_count--;
3335 vm_page_gobble_count--;
3336 m->gobbled = FALSE;
3337 }
316670eb
A
3338 /*
3339 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3340 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3341 * and we can't remove it manually since we would need the object lock
3342 * (which is not required here) to decrement the activity_in_progress
3343 * reference which is held on the object while the page is in the pageout queue...
3344 * just let the normal laundry processing proceed
3345 */
fe8ab488 3346 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
1c79356b
A
3347 return;
3348
2d21ac55
A
3349#if DEBUG
3350 if (m->active)
3351 panic("vm_page_activate: already active");
3352#endif
3353
3354 if (m->speculative) {
3355 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3356 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3357 }
316670eb 3358
3e170ce0 3359 vm_page_queues_remove(m);
2d21ac55 3360
b0d623f7 3361 if ( !VM_PAGE_WIRED(m)) {
3e170ce0 3362 vm_page_check_pageable_safe(m);
6d2010ae
A
3363 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3364 m->dirty && m->object->internal &&
d1ecb069
A
3365 (m->object->purgable == VM_PURGABLE_DENY ||
3366 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3367 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
3368 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3369 m->throttled = TRUE;
3370 vm_page_throttled_count++;
9bccf70c 3371 } else {
2d21ac55
A
3372 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3373 m->active = TRUE;
6d2010ae 3374 vm_page_active_count++;
39236c6e
A
3375 if (m->object->internal) {
3376 vm_page_pageable_internal_count++;
3377 } else {
3378 vm_page_pageable_external_count++;
3379 }
9bccf70c 3380 }
2d21ac55
A
3381 m->reference = TRUE;
3382 m->no_cache = FALSE;
1c79356b 3383 }
b0d623f7 3384 VM_PAGE_CHECK(m);
2d21ac55
A
3385}
3386
3387
3388/*
3389 * vm_page_speculate:
3390 *
3391 * Put the specified page on the speculative list (if appropriate).
3392 *
3393 * The page queues must be locked.
3394 */
3395void
3396vm_page_speculate(
3397 vm_page_t m,
3398 boolean_t new)
3399{
3400 struct vm_speculative_age_q *aq;
3401
3402 VM_PAGE_CHECK(m);
3e170ce0
A
3403 vm_page_check_pageable_safe(m);
3404
2d21ac55 3405 assert(m->phys_page != vm_page_guard_addr);
91447636 3406#if DEBUG
b0d623f7 3407 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 3408#endif
6d2010ae 3409 assert( !(m->absent && !m->unusual));
b0d623f7 3410
316670eb
A
3411 /*
3412 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3413 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3414 * and we can't remove it manually since we would need the object lock
3415 * (which is not required here) to decrement the activity_in_progress
3416 * reference which is held on the object while the page is in the pageout queue...
3417 * just let the normal laundry processing proceed
3418 */
fe8ab488 3419 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
6d2010ae 3420 return;
0b4c1975 3421
3e170ce0 3422 vm_page_queues_remove(m);
b0d623f7
A
3423
3424 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 3425 mach_timespec_t ts;
b0d623f7
A
3426 clock_sec_t sec;
3427 clock_nsec_t nsec;
2d21ac55 3428
b0d623f7
A
3429 clock_get_system_nanotime(&sec, &nsec);
3430 ts.tv_sec = (unsigned int) sec;
3431 ts.tv_nsec = nsec;
2d21ac55
A
3432
3433 if (vm_page_speculative_count == 0) {
3434
3435 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3436 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3437
3438 aq = &vm_page_queue_speculative[speculative_age_index];
3439
3440 /*
3441 * set the timer to begin a new group
3442 */
6d2010ae
A
3443 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3444 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
3445
3446 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3447 } else {
3448 aq = &vm_page_queue_speculative[speculative_age_index];
3449
3450 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3451
3452 speculative_age_index++;
3453
3454 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3455 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3456 if (speculative_age_index == speculative_steal_index) {
3457 speculative_steal_index = speculative_age_index + 1;
3458
3459 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3460 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3461 }
3462 aq = &vm_page_queue_speculative[speculative_age_index];
3463
3464 if (!queue_empty(&aq->age_q))
3465 vm_page_speculate_ageit(aq);
3466
6d2010ae
A
3467 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3468 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
3469
3470 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3471 }
3472 }
3473 enqueue_tail(&aq->age_q, &m->pageq);
3474 m->speculative = TRUE;
3475 vm_page_speculative_count++;
39236c6e
A
3476 if (m->object->internal) {
3477 vm_page_pageable_internal_count++;
3478 } else {
3479 vm_page_pageable_external_count++;
3480 }
2d21ac55
A
3481
3482 if (new == TRUE) {
6d2010ae
A
3483 vm_object_lock_assert_exclusive(m->object);
3484
2d21ac55 3485 m->object->pages_created++;
b0d623f7 3486#if DEVELOPMENT || DEBUG
2d21ac55 3487 vm_page_speculative_created++;
b0d623f7 3488#endif
2d21ac55
A
3489 }
3490 }
b0d623f7 3491 VM_PAGE_CHECK(m);
2d21ac55
A
3492}
3493
3494
3495/*
3496 * move pages from the specified aging bin to
3497 * the speculative bin that pageout_scan claims from
3498 *
3499 * The page queues must be locked.
3500 */
3501void
3502vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3503{
3504 struct vm_speculative_age_q *sq;
3505 vm_page_t t;
3506
3507 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3508
3509 if (queue_empty(&sq->age_q)) {
3510 sq->age_q.next = aq->age_q.next;
3511 sq->age_q.prev = aq->age_q.prev;
3512
3513 t = (vm_page_t)sq->age_q.next;
3514 t->pageq.prev = &sq->age_q;
3515
3516 t = (vm_page_t)sq->age_q.prev;
3517 t->pageq.next = &sq->age_q;
3518 } else {
3519 t = (vm_page_t)sq->age_q.prev;
3520 t->pageq.next = aq->age_q.next;
3521
3522 t = (vm_page_t)aq->age_q.next;
3523 t->pageq.prev = sq->age_q.prev;
3524
3525 t = (vm_page_t)aq->age_q.prev;
3526 t->pageq.next = &sq->age_q;
3527
3528 sq->age_q.prev = aq->age_q.prev;
1c79356b 3529 }
2d21ac55
A
3530 queue_init(&aq->age_q);
3531}
3532
3533
3534void
3535vm_page_lru(
3536 vm_page_t m)
3537{
3538 VM_PAGE_CHECK(m);
3539 assert(m->object != kernel_object);
3540 assert(m->phys_page != vm_page_guard_addr);
3541
3542#if DEBUG
b0d623f7 3543 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 3544#endif
316670eb
A
3545 /*
3546 * if this page is currently on the pageout queue, we can't do the
3e170ce0 3547 * vm_page_queues_remove (which doesn't handle the pageout queue case)
316670eb
A
3548 * and we can't remove it manually since we would need the object lock
3549 * (which is not required here) to decrement the activity_in_progress
3550 * reference which is held on the object while the page is in the pageout queue...
3551 * just let the normal laundry processing proceed
3552 */
fe8ab488 3553 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
2d21ac55
A
3554 return;
3555
3556 m->no_cache = FALSE;
3557
3e170ce0 3558 vm_page_queues_remove(m);
2d21ac55 3559
3e170ce0 3560 vm_page_enqueue_inactive(m, FALSE);
1c79356b
A
3561}
3562
2d21ac55 3563
b0d623f7
A
3564void
3565vm_page_reactivate_all_throttled(void)
3566{
3567 vm_page_t first_throttled, last_throttled;
3568 vm_page_t first_active;
3569 vm_page_t m;
3570 int extra_active_count;
39236c6e 3571 int extra_internal_count, extra_external_count;
b0d623f7 3572
6d2010ae
A
3573 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3574 return;
3575
b0d623f7 3576 extra_active_count = 0;
39236c6e
A
3577 extra_internal_count = 0;
3578 extra_external_count = 0;
b0d623f7
A
3579 vm_page_lock_queues();
3580 if (! queue_empty(&vm_page_queue_throttled)) {
3581 /*
3582 * Switch "throttled" pages to "active".
3583 */
3584 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3585 VM_PAGE_CHECK(m);
3586 assert(m->throttled);
3587 assert(!m->active);
3588 assert(!m->inactive);
3589 assert(!m->speculative);
3590 assert(!VM_PAGE_WIRED(m));
6d2010ae
A
3591
3592 extra_active_count++;
39236c6e
A
3593 if (m->object->internal) {
3594 extra_internal_count++;
3595 } else {
3596 extra_external_count++;
3597 }
6d2010ae 3598
b0d623f7
A
3599 m->throttled = FALSE;
3600 m->active = TRUE;
3601 VM_PAGE_CHECK(m);
3602 }
3603
3604 /*
3605 * Transfer the entire throttled queue to a regular LRU page queues.
3606 * We insert it at the head of the active queue, so that these pages
3607 * get re-evaluated by the LRU algorithm first, since they've been
3608 * completely out of it until now.
3609 */
3610 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3611 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3612 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3613 if (queue_empty(&vm_page_queue_active)) {
3614 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3615 } else {
3616 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3617 }
3618 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3619 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3620 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3621
3622#if DEBUG
3623 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3624#endif
3625 queue_init(&vm_page_queue_throttled);
3626 /*
3627 * Adjust the global page counts.
3628 */
3629 vm_page_active_count += extra_active_count;
39236c6e
A
3630 vm_page_pageable_internal_count += extra_internal_count;
3631 vm_page_pageable_external_count += extra_external_count;
b0d623f7
A
3632 vm_page_throttled_count = 0;
3633 }
3634 assert(vm_page_throttled_count == 0);
3635 assert(queue_empty(&vm_page_queue_throttled));
3636 vm_page_unlock_queues();
3637}
3638
3639
3640/*
3641 * move pages from the indicated local queue to the global active queue
3642 * its ok to fail if we're below the hard limit and force == FALSE
3643 * the nolocks == TRUE case is to allow this function to be run on
3644 * the hibernate path
3645 */
3646
3647void
3648vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3649{
3650 struct vpl *lq;
3651 vm_page_t first_local, last_local;
3652 vm_page_t first_active;
3653 vm_page_t m;
3654 uint32_t count = 0;
3655
3656 if (vm_page_local_q == NULL)
3657 return;
3658
3659 lq = &vm_page_local_q[lid].vpl_un.vpl;
3660
3661 if (nolocks == FALSE) {
3662 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3663 if ( !vm_page_trylockspin_queues())
3664 return;
3665 } else
3666 vm_page_lockspin_queues();
3667
3668 VPL_LOCK(&lq->vpl_lock);
3669 }
3670 if (lq->vpl_count) {
3671 /*
3672 * Switch "local" pages to "active".
3673 */
3674 assert(!queue_empty(&lq->vpl_queue));
3675
3676 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3677 VM_PAGE_CHECK(m);
3e170ce0 3678 vm_page_check_pageable_safe(m);
b0d623f7
A
3679 assert(m->local);
3680 assert(!m->active);
3681 assert(!m->inactive);
3682 assert(!m->speculative);
3683 assert(!VM_PAGE_WIRED(m));
3684 assert(!m->throttled);
3685 assert(!m->fictitious);
3686
3687 if (m->local_id != lid)
3688 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3689
3690 m->local_id = 0;
3691 m->local = FALSE;
3692 m->active = TRUE;
3693 VM_PAGE_CHECK(m);
3694
3695 count++;
3696 }
3697 if (count != lq->vpl_count)
3698 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3699
3700 /*
3701 * Transfer the entire local queue to a regular LRU page queues.
3702 */
3703 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3704 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3705 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3706
3707 if (queue_empty(&vm_page_queue_active)) {
3708 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3709 } else {
3710 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3711 }
3712 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3713 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3714 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3715
3716 queue_init(&lq->vpl_queue);
3717 /*
3718 * Adjust the global page counts.
3719 */
3720 vm_page_active_count += lq->vpl_count;
39236c6e
A
3721 vm_page_pageable_internal_count += lq->vpl_internal_count;
3722 vm_page_pageable_external_count += lq->vpl_external_count;
b0d623f7 3723 lq->vpl_count = 0;
39236c6e
A
3724 lq->vpl_internal_count = 0;
3725 lq->vpl_external_count = 0;
b0d623f7
A
3726 }
3727 assert(queue_empty(&lq->vpl_queue));
3728
3729 if (nolocks == FALSE) {
3730 VPL_UNLOCK(&lq->vpl_lock);
3731 vm_page_unlock_queues();
3732 }
3733}
3734
1c79356b
A
3735/*
3736 * vm_page_part_zero_fill:
3737 *
3738 * Zero-fill a part of the page.
3739 */
39236c6e 3740#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
1c79356b
A
3741void
3742vm_page_part_zero_fill(
3743 vm_page_t m,
3744 vm_offset_t m_pa,
3745 vm_size_t len)
3746{
1c79356b 3747
316670eb
A
3748#if 0
3749 /*
3750 * we don't hold the page queue lock
3751 * so this check isn't safe to make
3752 */
1c79356b 3753 VM_PAGE_CHECK(m);
316670eb
A
3754#endif
3755
1c79356b 3756#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
55e303ae 3757 pmap_zero_part_page(m->phys_page, m_pa, len);
1c79356b 3758#else
39236c6e 3759 vm_page_t tmp;
1c79356b
A
3760 while (1) {
3761 tmp = vm_page_grab();
3762 if (tmp == VM_PAGE_NULL) {
3763 vm_page_wait(THREAD_UNINT);
3764 continue;
3765 }
3766 break;
3767 }
3768 vm_page_zero_fill(tmp);
3769 if(m_pa != 0) {
3770 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3771 }
3772 if((m_pa + len) < PAGE_SIZE) {
3773 vm_page_part_copy(m, m_pa + len, tmp,
3774 m_pa + len, PAGE_SIZE - (m_pa + len));
3775 }
3776 vm_page_copy(tmp,m);
b0d623f7 3777 VM_PAGE_FREE(tmp);
1c79356b
A
3778#endif
3779
3780}
3781
3782/*
3783 * vm_page_zero_fill:
3784 *
3785 * Zero-fill the specified page.
3786 */
3787void
3788vm_page_zero_fill(
3789 vm_page_t m)
3790{
3791 XPR(XPR_VM_PAGE,
3792 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 3793 m->object, m->offset, m, 0,0);
316670eb
A
3794#if 0
3795 /*
3796 * we don't hold the page queue lock
3797 * so this check isn't safe to make
3798 */
1c79356b 3799 VM_PAGE_CHECK(m);
316670eb 3800#endif
1c79356b 3801
55e303ae
A
3802// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3803 pmap_zero_page(m->phys_page);
1c79356b
A
3804}
3805
3806/*
3807 * vm_page_part_copy:
3808 *
3809 * copy part of one page to another
3810 */
3811
3812void
3813vm_page_part_copy(
3814 vm_page_t src_m,
3815 vm_offset_t src_pa,
3816 vm_page_t dst_m,
3817 vm_offset_t dst_pa,
3818 vm_size_t len)
3819{
316670eb
A
3820#if 0
3821 /*
3822 * we don't hold the page queue lock
3823 * so this check isn't safe to make
3824 */
1c79356b
A
3825 VM_PAGE_CHECK(src_m);
3826 VM_PAGE_CHECK(dst_m);
316670eb 3827#endif
55e303ae
A
3828 pmap_copy_part_page(src_m->phys_page, src_pa,
3829 dst_m->phys_page, dst_pa, len);
1c79356b
A
3830}
3831
3832/*
3833 * vm_page_copy:
3834 *
3835 * Copy one page to another
91447636
A
3836 *
3837 * ENCRYPTED SWAP:
3838 * The source page should not be encrypted. The caller should
3839 * make sure the page is decrypted first, if necessary.
1c79356b
A
3840 */
3841
2d21ac55
A
3842int vm_page_copy_cs_validations = 0;
3843int vm_page_copy_cs_tainted = 0;
3844
1c79356b
A
3845void
3846vm_page_copy(
3847 vm_page_t src_m,
3848 vm_page_t dest_m)
3849{
3850 XPR(XPR_VM_PAGE,
3851 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
b0d623f7
A
3852 src_m->object, src_m->offset,
3853 dest_m->object, dest_m->offset,
1c79356b 3854 0);
316670eb
A
3855#if 0
3856 /*
3857 * we don't hold the page queue lock
3858 * so this check isn't safe to make
3859 */
1c79356b
A
3860 VM_PAGE_CHECK(src_m);
3861 VM_PAGE_CHECK(dest_m);
316670eb
A
3862#endif
3863 vm_object_lock_assert_held(src_m->object);
1c79356b 3864
91447636
A
3865 /*
3866 * ENCRYPTED SWAP:
3867 * The source page should not be encrypted at this point.
3868 * The destination page will therefore not contain encrypted
3869 * data after the copy.
3870 */
3871 if (src_m->encrypted) {
3872 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3873 }
3874 dest_m->encrypted = FALSE;
3875
2d21ac55 3876 if (src_m->object != VM_OBJECT_NULL &&
4a3eedf9 3877 src_m->object->code_signed) {
2d21ac55 3878 /*
4a3eedf9 3879 * We're copying a page from a code-signed object.
2d21ac55
A
3880 * Whoever ends up mapping the copy page might care about
3881 * the original page's integrity, so let's validate the
3882 * source page now.
3883 */
3884 vm_page_copy_cs_validations++;
3885 vm_page_validate_cs(src_m);
3886 }
6d2010ae
A
3887
3888 if (vm_page_is_slideable(src_m)) {
3889 boolean_t was_busy = src_m->busy;
3890 src_m->busy = TRUE;
3891 (void) vm_page_slide(src_m, 0);
3892 assert(src_m->busy);
316670eb 3893 if (!was_busy) {
6d2010ae
A
3894 PAGE_WAKEUP_DONE(src_m);
3895 }
3896 }
3897
2d21ac55 3898 /*
b0d623f7
A
3899 * Propagate the cs_tainted bit to the copy page. Do not propagate
3900 * the cs_validated bit.
2d21ac55 3901 */
2d21ac55
A
3902 dest_m->cs_tainted = src_m->cs_tainted;
3903 if (dest_m->cs_tainted) {
2d21ac55
A
3904 vm_page_copy_cs_tainted++;
3905 }
6d2010ae
A
3906 dest_m->slid = src_m->slid;
3907 dest_m->error = src_m->error; /* sliding src_m might have failed... */
55e303ae 3908 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
1c79356b
A
3909}
3910
2d21ac55 3911#if MACH_ASSERT
b0d623f7
A
3912static void
3913_vm_page_print(
3914 vm_page_t p)
3915{
3916 printf("vm_page %p: \n", p);
3917 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3918 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
fe8ab488 3919 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
b0d623f7
A
3920 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3921 printf(" wire_count=%u\n", p->wire_count);
3922
3923 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3924 (p->local ? "" : "!"),
3925 (p->inactive ? "" : "!"),
3926 (p->active ? "" : "!"),
3927 (p->pageout_queue ? "" : "!"),
3928 (p->speculative ? "" : "!"),
3929 (p->laundry ? "" : "!"));
3930 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3931 (p->free ? "" : "!"),
3932 (p->reference ? "" : "!"),
3933 (p->gobbled ? "" : "!"),
3934 (p->private ? "" : "!"),
3935 (p->throttled ? "" : "!"));
3936 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3937 (p->busy ? "" : "!"),
3938 (p->wanted ? "" : "!"),
3939 (p->tabled ? "" : "!"),
3940 (p->fictitious ? "" : "!"),
3941 (p->pmapped ? "" : "!"),
3942 (p->wpmapped ? "" : "!"));
3943 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3944 (p->pageout ? "" : "!"),
3945 (p->absent ? "" : "!"),
3946 (p->error ? "" : "!"),
3947 (p->dirty ? "" : "!"),
3948 (p->cleaning ? "" : "!"),
3949 (p->precious ? "" : "!"),
3950 (p->clustered ? "" : "!"));
3951 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3952 (p->overwriting ? "" : "!"),
3953 (p->restart ? "" : "!"),
3954 (p->unusual ? "" : "!"),
3955 (p->encrypted ? "" : "!"),
3956 (p->encrypted_cleaning ? "" : "!"));
c18c124e 3957 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
b0d623f7
A
3958 (p->cs_validated ? "" : "!"),
3959 (p->cs_tainted ? "" : "!"),
c18c124e 3960 (p->cs_nx ? "" : "!"),
b0d623f7 3961 (p->no_cache ? "" : "!"));
b0d623f7
A
3962
3963 printf("phys_page=0x%x\n", p->phys_page);
3964}
3965
1c79356b
A
3966/*
3967 * Check that the list of pages is ordered by
3968 * ascending physical address and has no holes.
3969 */
2d21ac55 3970static int
1c79356b
A
3971vm_page_verify_contiguous(
3972 vm_page_t pages,
3973 unsigned int npages)
3974{
3975 register vm_page_t m;
3976 unsigned int page_count;
91447636 3977 vm_offset_t prev_addr;
1c79356b 3978
55e303ae 3979 prev_addr = pages->phys_page;
1c79356b
A
3980 page_count = 1;
3981 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
55e303ae 3982 if (m->phys_page != prev_addr + 1) {
b0d623f7
A
3983 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3984 m, (long)prev_addr, m->phys_page);
6d2010ae 3985 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
3986 panic("vm_page_verify_contiguous: not contiguous!");
3987 }
55e303ae 3988 prev_addr = m->phys_page;
1c79356b
A
3989 ++page_count;
3990 }
3991 if (page_count != npages) {
2d21ac55 3992 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
3993 pages, page_count, npages);
3994 panic("vm_page_verify_contiguous: count error");
3995 }
3996 return 1;
3997}
1c79356b
A
3998
3999
2d21ac55
A
4000/*
4001 * Check the free lists for proper length etc.
4002 */
fe8ab488 4003static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
b0d623f7
A
4004static unsigned int
4005vm_page_verify_free_list(
d1ecb069 4006 queue_head_t *vm_page_queue,
b0d623f7
A
4007 unsigned int color,
4008 vm_page_t look_for_page,
4009 boolean_t expect_page)
4010{
4011 unsigned int npages;
4012 vm_page_t m;
4013 vm_page_t prev_m;
4014 boolean_t found_page;
4015
fe8ab488
A
4016 if (! vm_page_verify_this_free_list_enabled)
4017 return 0;
4018
b0d623f7
A
4019 found_page = FALSE;
4020 npages = 0;
d1ecb069
A
4021 prev_m = (vm_page_t) vm_page_queue;
4022 queue_iterate(vm_page_queue,
b0d623f7
A
4023 m,
4024 vm_page_t,
4025 pageq) {
6d2010ae 4026
b0d623f7
A
4027 if (m == look_for_page) {
4028 found_page = TRUE;
4029 }
4030 if ((vm_page_t) m->pageq.prev != prev_m)
4031 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4032 color, npages, m, m->pageq.prev, prev_m);
b0d623f7
A
4033 if ( ! m->busy )
4034 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4035 color, npages, m);
6d2010ae
A
4036 if (color != (unsigned int) -1) {
4037 if ((m->phys_page & vm_color_mask) != color)
4038 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4039 color, npages, m, m->phys_page & vm_color_mask, color);
4040 if ( ! m->free )
4041 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
4042 color, npages, m);
4043 }
b0d623f7
A
4044 ++npages;
4045 prev_m = m;
4046 }
4047 if (look_for_page != VM_PAGE_NULL) {
4048 unsigned int other_color;
4049
4050 if (expect_page && !found_page) {
4051 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4052 color, npages, look_for_page, look_for_page->phys_page);
4053 _vm_page_print(look_for_page);
4054 for (other_color = 0;
4055 other_color < vm_colors;
4056 other_color++) {
4057 if (other_color == color)
4058 continue;
d1ecb069 4059 vm_page_verify_free_list(&vm_page_queue_free[other_color],
6d2010ae 4060 other_color, look_for_page, FALSE);
b0d623f7 4061 }
6d2010ae 4062 if (color == (unsigned int) -1) {
d1ecb069
A
4063 vm_page_verify_free_list(&vm_lopage_queue_free,
4064 (unsigned int) -1, look_for_page, FALSE);
4065 }
b0d623f7
A
4066 panic("vm_page_verify_free_list(color=%u)\n", color);
4067 }
4068 if (!expect_page && found_page) {
4069 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4070 color, npages, look_for_page, look_for_page->phys_page);
4071 }
4072 }
4073 return npages;
4074}
4075
fe8ab488 4076static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
2d21ac55
A
4077static void
4078vm_page_verify_free_lists( void )
4079{
d1ecb069 4080 unsigned int color, npages, nlopages;
fe8ab488 4081 boolean_t toggle = TRUE;
b0d623f7 4082
fe8ab488 4083 if (! vm_page_verify_all_free_lists_enabled)
b0d623f7
A
4084 return;
4085
2d21ac55 4086 npages = 0;
b0d623f7
A
4087
4088 lck_mtx_lock(&vm_page_queue_free_lock);
fe8ab488
A
4089
4090 if (vm_page_verify_this_free_list_enabled == TRUE) {
4091 /*
4092 * This variable has been set globally for extra checking of
4093 * each free list Q. Since we didn't set it, we don't own it
4094 * and we shouldn't toggle it.
4095 */
4096 toggle = FALSE;
4097 }
4098
4099 if (toggle == TRUE) {
4100 vm_page_verify_this_free_list_enabled = TRUE;
4101 }
2d21ac55
A
4102
4103 for( color = 0; color < vm_colors; color++ ) {
d1ecb069 4104 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
6d2010ae 4105 color, VM_PAGE_NULL, FALSE);
2d21ac55 4106 }
d1ecb069
A
4107 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4108 (unsigned int) -1,
4109 VM_PAGE_NULL, FALSE);
4110 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4111 panic("vm_page_verify_free_lists: "
4112 "npages %u free_count %d nlopages %u lo_free_count %u",
4113 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
6d2010ae 4114
fe8ab488
A
4115 if (toggle == TRUE) {
4116 vm_page_verify_this_free_list_enabled = FALSE;
4117 }
4118
b0d623f7 4119 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 4120}
2d21ac55 4121
b0d623f7
A
4122void
4123vm_page_queues_assert(
4124 vm_page_t mem,
4125 int val)
4126{
316670eb
A
4127#if DEBUG
4128 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4129#endif
b0d623f7
A
4130 if (mem->free + mem->active + mem->inactive + mem->speculative +
4131 mem->throttled + mem->pageout_queue > (val)) {
4132 _vm_page_print(mem);
4133 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
4134 }
4135 if (VM_PAGE_WIRED(mem)) {
4136 assert(!mem->active);
4137 assert(!mem->inactive);
4138 assert(!mem->speculative);
4139 assert(!mem->throttled);
316670eb 4140 assert(!mem->pageout_queue);
b0d623f7
A
4141 }
4142}
4143#endif /* MACH_ASSERT */
2d21ac55 4144
91447636 4145
3e170ce0
A
4146
4147
4148
4149extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4150
1c79356b 4151/*
2d21ac55 4152 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
4153 *
4154 * Find a region large enough to contain at least n pages
1c79356b
A
4155 * of contiguous physical memory.
4156 *
2d21ac55
A
4157 * This is done by traversing the vm_page_t array in a linear fashion
4158 * we assume that the vm_page_t array has the avaiable physical pages in an
4159 * ordered, ascending list... this is currently true of all our implementations
4160 * and must remain so... there can be 'holes' in the array... we also can
4161 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4162 * which use to happen via 'vm_page_convert'... that function was no longer
4163 * being called and was removed...
4164 *
4165 * The basic flow consists of stabilizing some of the interesting state of
4166 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4167 * sweep at the beginning of the array looking for pages that meet our criterea
4168 * for a 'stealable' page... currently we are pretty conservative... if the page
4169 * meets this criterea and is physically contiguous to the previous page in the 'run'
4170 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4171 * and start to develop a new run... if at this point we've already considered
4172 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4173 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4174 * to other threads trying to acquire free pages (or move pages from q to q),
4175 * and then continue from the spot we left off... we only make 1 pass through the
4176 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4177 * which steals the pages from the queues they're currently on... pages on the free
4178 * queue can be stolen directly... pages that are on any of the other queues
4179 * must be removed from the object they are tabled on... this requires taking the
4180 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4181 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4182 * dump the pages we've currently stolen back to the free list, and pick up our
4183 * scan from the point where we aborted the 'current' run.
4184 *
4185 *
1c79356b 4186 * Requirements:
2d21ac55 4187 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 4188 *
2d21ac55 4189 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 4190 *
e5568f75 4191 * Algorithm:
1c79356b 4192 */
2d21ac55
A
4193
4194#define MAX_CONSIDERED_BEFORE_YIELD 1000
4195
4196
4197#define RESET_STATE_OF_RUN() \
4198 MACRO_BEGIN \
4199 prevcontaddr = -2; \
b0d623f7 4200 start_pnum = -1; \
2d21ac55
A
4201 free_considered = 0; \
4202 substitute_needed = 0; \
4203 npages = 0; \
4204 MACRO_END
4205
b0d623f7
A
4206/*
4207 * Can we steal in-use (i.e. not free) pages when searching for
4208 * physically-contiguous pages ?
4209 */
4210#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4211
4212static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4213#if DEBUG
4214int vm_page_find_contig_debug = 0;
4215#endif
2d21ac55 4216
1c79356b
A
4217static vm_page_t
4218vm_page_find_contiguous(
2d21ac55
A
4219 unsigned int contig_pages,
4220 ppnum_t max_pnum,
b0d623f7
A
4221 ppnum_t pnum_mask,
4222 boolean_t wire,
4223 int flags)
1c79356b 4224{
2d21ac55 4225 vm_page_t m = NULL;
e5568f75 4226 ppnum_t prevcontaddr;
b0d623f7
A
4227 ppnum_t start_pnum;
4228 unsigned int npages, considered, scanned;
4229 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4230 unsigned int idx_last_contig_page_found = 0;
2d21ac55
A
4231 int free_considered, free_available;
4232 int substitute_needed;
3e170ce0 4233 boolean_t wrapped, zone_gc_called = FALSE;
593a1d5f 4234#if DEBUG
b0d623f7
A
4235 clock_sec_t tv_start_sec, tv_end_sec;
4236 clock_usec_t tv_start_usec, tv_end_usec;
593a1d5f 4237#endif
3e170ce0 4238
2d21ac55
A
4239 int yielded = 0;
4240 int dumped_run = 0;
4241 int stolen_pages = 0;
39236c6e 4242 int compressed_pages = 0;
3e170ce0 4243
1c79356b 4244
2d21ac55 4245 if (contig_pages == 0)
1c79356b
A
4246 return VM_PAGE_NULL;
4247
3e170ce0
A
4248full_scan_again:
4249
2d21ac55
A
4250#if MACH_ASSERT
4251 vm_page_verify_free_lists();
593a1d5f
A
4252#endif
4253#if DEBUG
2d21ac55
A
4254 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4255#endif
39236c6e
A
4256 PAGE_REPLACEMENT_ALLOWED(TRUE);
4257
2d21ac55 4258 vm_page_lock_queues();
3e170ce0
A
4259
4260
b0d623f7 4261 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
4262
4263 RESET_STATE_OF_RUN();
1c79356b 4264
b0d623f7 4265 scanned = 0;
2d21ac55
A
4266 considered = 0;
4267 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 4268
b0d623f7
A
4269 wrapped = FALSE;
4270
4271 if(flags & KMA_LOMEM)
4272 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4273 else
4274 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4275
4276 orig_last_idx = idx_last_contig_page_found;
4277 last_idx = orig_last_idx;
4278
4279 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
4280 npages < contig_pages && page_idx < vm_pages_count;
4281 page_idx++) {
b0d623f7
A
4282retry:
4283 if (wrapped &&
4284 npages == 0 &&
4285 page_idx >= orig_last_idx) {
4286 /*
4287 * We're back where we started and we haven't
4288 * found any suitable contiguous range. Let's
4289 * give up.
4290 */
4291 break;
4292 }
4293 scanned++;
2d21ac55 4294 m = &vm_pages[page_idx];
e5568f75 4295
b0d623f7
A
4296 assert(!m->fictitious);
4297 assert(!m->private);
4298
2d21ac55
A
4299 if (max_pnum && m->phys_page > max_pnum) {
4300 /* no more low pages... */
4301 break;
e5568f75 4302 }
6d2010ae 4303 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
b0d623f7
A
4304 /*
4305 * not aligned
4306 */
4307 RESET_STATE_OF_RUN();
4308
4309 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
39236c6e
A
4310 m->encrypted_cleaning ||
4311 m->pageout_queue || m->laundry || m->wanted ||
4312 m->cleaning || m->overwriting || m->pageout) {
2d21ac55
A
4313 /*
4314 * page is in a transient state
4315 * or a state we don't want to deal
4316 * with, so don't consider it which
4317 * means starting a new run
4318 */
4319 RESET_STATE_OF_RUN();
1c79356b 4320
39236c6e 4321 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
2d21ac55
A
4322 /*
4323 * page needs to be on one of our queues
39236c6e 4324 * or it needs to belong to the compressor pool
2d21ac55
A
4325 * in order for it to be stable behind the
4326 * locks we hold at this point...
4327 * if not, don't consider it which
4328 * means starting a new run
4329 */
4330 RESET_STATE_OF_RUN();
4331
4332 } else if (!m->free && (!m->tabled || m->busy)) {
4333 /*
4334 * pages on the free list are always 'busy'
4335 * so we couldn't test for 'busy' in the check
4336 * for the transient states... pages that are
4337 * 'free' are never 'tabled', so we also couldn't
4338 * test for 'tabled'. So we check here to make
4339 * sure that a non-free page is not busy and is
4340 * tabled on an object...
4341 * if not, don't consider it which
4342 * means starting a new run
4343 */
4344 RESET_STATE_OF_RUN();
4345
4346 } else {
4347 if (m->phys_page != prevcontaddr + 1) {
b0d623f7
A
4348 if ((m->phys_page & pnum_mask) != 0) {
4349 RESET_STATE_OF_RUN();
4350 goto did_consider;
4351 } else {
4352 npages = 1;
4353 start_idx = page_idx;
4354 start_pnum = m->phys_page;
4355 }
2d21ac55
A
4356 } else {
4357 npages++;
e5568f75 4358 }
2d21ac55 4359 prevcontaddr = m->phys_page;
b0d623f7
A
4360
4361 VM_PAGE_CHECK(m);
2d21ac55
A
4362 if (m->free) {
4363 free_considered++;
b0d623f7
A
4364 } else {
4365 /*
4366 * This page is not free.
4367 * If we can't steal used pages,
4368 * we have to give up this run
4369 * and keep looking.
4370 * Otherwise, we might need to
4371 * move the contents of this page
4372 * into a substitute page.
4373 */
4374#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
39236c6e 4375 if (m->pmapped || m->dirty || m->precious) {
b0d623f7
A
4376 substitute_needed++;
4377 }
4378#else
4379 RESET_STATE_OF_RUN();
4380#endif
2d21ac55 4381 }
b0d623f7 4382
2d21ac55
A
4383 if ((free_considered + substitute_needed) > free_available) {
4384 /*
4385 * if we let this run continue
4386 * we will end up dropping the vm_page_free_count
4387 * below the reserve limit... we need to abort
4388 * this run, but we can at least re-consider this
4389 * page... thus the jump back to 'retry'
4390 */
4391 RESET_STATE_OF_RUN();
4392
4393 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4394 considered++;
4395 goto retry;
e5568f75 4396 }
2d21ac55
A
4397 /*
4398 * free_available == 0
4399 * so can't consider any free pages... if
4400 * we went to retry in this case, we'd
4401 * get stuck looking at the same page
4402 * w/o making any forward progress
4403 * we also want to take this path if we've already
4404 * reached our limit that controls the lock latency
4405 */
e5568f75 4406 }
2d21ac55 4407 }
b0d623f7 4408did_consider:
2d21ac55 4409 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
39236c6e
A
4410
4411 PAGE_REPLACEMENT_ALLOWED(FALSE);
4412
b0d623f7 4413 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 4414 vm_page_unlock_queues();
e5568f75 4415
2d21ac55
A
4416 mutex_pause(0);
4417
39236c6e
A
4418 PAGE_REPLACEMENT_ALLOWED(TRUE);
4419
2d21ac55 4420 vm_page_lock_queues();
b0d623f7 4421 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
4422
4423 RESET_STATE_OF_RUN();
1c79356b 4424 /*
2d21ac55
A
4425 * reset our free page limit since we
4426 * dropped the lock protecting the vm_page_free_queue
1c79356b 4427 */
2d21ac55
A
4428 free_available = vm_page_free_count - vm_page_free_reserved;
4429 considered = 0;
3e170ce0 4430
2d21ac55 4431 yielded++;
3e170ce0 4432
2d21ac55
A
4433 goto retry;
4434 }
4435 considered++;
4436 }
4437 m = VM_PAGE_NULL;
4438
b0d623f7
A
4439 if (npages != contig_pages) {
4440 if (!wrapped) {
4441 /*
4442 * We didn't find a contiguous range but we didn't
4443 * start from the very first page.
4444 * Start again from the very first page.
4445 */
4446 RESET_STATE_OF_RUN();
4447 if( flags & KMA_LOMEM)
4448 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4449 else
4450 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4451 last_idx = 0;
4452 page_idx = last_idx;
4453 wrapped = TRUE;
4454 goto retry;
4455 }
4456 lck_mtx_unlock(&vm_page_queue_free_lock);
4457 } else {
2d21ac55
A
4458 vm_page_t m1;
4459 vm_page_t m2;
4460 unsigned int cur_idx;
4461 unsigned int tmp_start_idx;
4462 vm_object_t locked_object = VM_OBJECT_NULL;
4463 boolean_t abort_run = FALSE;
4464
b0d623f7
A
4465 assert(page_idx - start_idx == contig_pages);
4466
2d21ac55
A
4467 tmp_start_idx = start_idx;
4468
4469 /*
4470 * first pass through to pull the free pages
4471 * off of the free queue so that in case we
4472 * need substitute pages, we won't grab any
4473 * of the free pages in the run... we'll clear
4474 * the 'free' bit in the 2nd pass, and even in
4475 * an abort_run case, we'll collect all of the
4476 * free pages in this run and return them to the free list
4477 */
4478 while (start_idx < page_idx) {
4479
4480 m1 = &vm_pages[start_idx++];
4481
b0d623f7
A
4482#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4483 assert(m1->free);
4484#endif
4485
2d21ac55 4486 if (m1->free) {
0b4c1975 4487 unsigned int color;
2d21ac55 4488
0b4c1975 4489 color = m1->phys_page & vm_color_mask;
b0d623f7 4490#if MACH_ASSERT
6d2010ae 4491 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
b0d623f7 4492#endif
0b4c1975
A
4493 queue_remove(&vm_page_queue_free[color],
4494 m1,
4495 vm_page_t,
4496 pageq);
d1ecb069
A
4497 m1->pageq.next = NULL;
4498 m1->pageq.prev = NULL;
0b4c1975 4499#if MACH_ASSERT
6d2010ae 4500 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
0b4c1975 4501#endif
b0d623f7
A
4502 /*
4503 * Clear the "free" bit so that this page
4504 * does not get considered for another
4505 * concurrent physically-contiguous allocation.
4506 */
4507 m1->free = FALSE;
4508 assert(m1->busy);
0b4c1975
A
4509
4510 vm_page_free_count--;
2d21ac55
A
4511 }
4512 }
b0d623f7
A
4513 if( flags & KMA_LOMEM)
4514 vm_page_lomem_find_contiguous_last_idx = page_idx;
4515 else
4516 vm_page_find_contiguous_last_idx = page_idx;
4517
2d21ac55
A
4518 /*
4519 * we can drop the free queue lock at this point since
4520 * we've pulled any 'free' candidates off of the list
4521 * we need it dropped so that we can do a vm_page_grab
4522 * when substituing for pmapped/dirty pages
4523 */
b0d623f7 4524 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
4525
4526 start_idx = tmp_start_idx;
4527 cur_idx = page_idx - 1;
4528
4529 while (start_idx++ < page_idx) {
4530 /*
4531 * must go through the list from back to front
4532 * so that the page list is created in the
4533 * correct order - low -> high phys addresses
4534 */
4535 m1 = &vm_pages[cur_idx--];
4536
b0d623f7 4537 assert(!m1->free);
39236c6e 4538
b0d623f7 4539 if (m1->object == VM_OBJECT_NULL) {
2d21ac55 4540 /*
b0d623f7 4541 * page has already been removed from
2d21ac55
A
4542 * the free list in the 1st pass
4543 */
b0d623f7 4544 assert(m1->offset == (vm_object_offset_t) -1);
2d21ac55
A
4545 assert(m1->busy);
4546 assert(!m1->wanted);
4547 assert(!m1->laundry);
e5568f75 4548 } else {
2d21ac55 4549 vm_object_t object;
39236c6e
A
4550 int refmod;
4551 boolean_t disconnected, reusable;
2d21ac55
A
4552
4553 if (abort_run == TRUE)
4554 continue;
4555
4556 object = m1->object;
4557
4558 if (object != locked_object) {
4559 if (locked_object) {
4560 vm_object_unlock(locked_object);
4561 locked_object = VM_OBJECT_NULL;
4562 }
4563 if (vm_object_lock_try(object))
4564 locked_object = object;
4565 }
4566 if (locked_object == VM_OBJECT_NULL ||
b0d623f7 4567 (VM_PAGE_WIRED(m1) || m1->gobbled ||
39236c6e
A
4568 m1->encrypted_cleaning ||
4569 m1->pageout_queue || m1->laundry || m1->wanted ||
4570 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
2d21ac55
A
4571
4572 if (locked_object) {
4573 vm_object_unlock(locked_object);
4574 locked_object = VM_OBJECT_NULL;
4575 }
4576 tmp_start_idx = cur_idx;
4577 abort_run = TRUE;
4578 continue;
4579 }
39236c6e
A
4580
4581 disconnected = FALSE;
4582 reusable = FALSE;
4583
4584 if ((m1->reusable ||
4585 m1->object->all_reusable) &&
4586 m1->inactive &&
4587 !m1->dirty &&
4588 !m1->reference) {
4589 /* reusable page... */
4590 refmod = pmap_disconnect(m1->phys_page);
4591 disconnected = TRUE;
4592 if (refmod == 0) {
4593 /*
4594 * ... not reused: can steal
4595 * without relocating contents.
4596 */
4597 reusable = TRUE;
4598 }
4599 }
4600
4601 if ((m1->pmapped &&
4602 ! reusable) ||
4603 m1->dirty ||
4604 m1->precious) {
2d21ac55
A
4605 vm_object_offset_t offset;
4606
4607 m2 = vm_page_grab();
4608
4609 if (m2 == VM_PAGE_NULL) {
4610 if (locked_object) {
4611 vm_object_unlock(locked_object);
4612 locked_object = VM_OBJECT_NULL;
4613 }
4614 tmp_start_idx = cur_idx;
4615 abort_run = TRUE;
4616 continue;
4617 }
39236c6e
A
4618 if (! disconnected) {
4619 if (m1->pmapped)
4620 refmod = pmap_disconnect(m1->phys_page);
4621 else
4622 refmod = 0;
4623 }
4624
4625 /* copy the page's contents */
4626 pmap_copy_page(m1->phys_page, m2->phys_page);
4627 /* copy the page's state */
4628 assert(!VM_PAGE_WIRED(m1));
4629 assert(!m1->free);
4630 assert(!m1->pageout_queue);
4631 assert(!m1->laundry);
4632 m2->reference = m1->reference;
4633 assert(!m1->gobbled);
4634 assert(!m1->private);
4635 m2->no_cache = m1->no_cache;
fe8ab488 4636 m2->xpmapped = 0;
39236c6e
A
4637 assert(!m1->busy);
4638 assert(!m1->wanted);
4639 assert(!m1->fictitious);
4640 m2->pmapped = m1->pmapped; /* should flush cache ? */
4641 m2->wpmapped = m1->wpmapped;
4642 assert(!m1->pageout);
4643 m2->absent = m1->absent;
4644 m2->error = m1->error;
4645 m2->dirty = m1->dirty;
4646 assert(!m1->cleaning);
4647 m2->precious = m1->precious;
4648 m2->clustered = m1->clustered;
4649 assert(!m1->overwriting);
4650 m2->restart = m1->restart;
4651 m2->unusual = m1->unusual;
4652 m2->encrypted = m1->encrypted;
4653 assert(!m1->encrypted_cleaning);
4654 m2->cs_validated = m1->cs_validated;
4655 m2->cs_tainted = m1->cs_tainted;
c18c124e 4656 m2->cs_nx = m1->cs_nx;
39236c6e
A
4657
4658 /*
4659 * If m1 had really been reusable,
4660 * we would have just stolen it, so
4661 * let's not propagate it's "reusable"
4662 * bit and assert that m2 is not
4663 * marked as "reusable".
4664 */
4665 // m2->reusable = m1->reusable;
4666 assert(!m2->reusable);
4667
4668 assert(!m1->lopage);
4669 m2->slid = m1->slid;
39236c6e
A
4670 m2->compressor = m1->compressor;
4671
15129b1c
A
4672 /*
4673 * page may need to be flushed if
4674 * it is marshalled into a UPL
4675 * that is going to be used by a device
4676 * that doesn't support coherency
4677 */
4678 m2->written_by_kernel = TRUE;
4679
39236c6e
A
4680 /*
4681 * make sure we clear the ref/mod state
4682 * from the pmap layer... else we risk
4683 * inheriting state from the last time
4684 * this page was used...
4685 */
4686 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2d21ac55
A
4687
4688 if (refmod & VM_MEM_REFERENCED)
4689 m2->reference = TRUE;
316670eb
A
4690 if (refmod & VM_MEM_MODIFIED) {
4691 SET_PAGE_DIRTY(m2, TRUE);
4692 }
2d21ac55
A
4693 offset = m1->offset;
4694
4695 /*
4696 * completely cleans up the state
4697 * of the page so that it is ready
4698 * to be put onto the free list, or
4699 * for this purpose it looks like it
4700 * just came off of the free list
4701 */
4702 vm_page_free_prepare(m1);
4703
4704 /*
39236c6e
A
4705 * now put the substitute page
4706 * on the object
2d21ac55 4707 */
3e170ce0 4708 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
2d21ac55 4709
39236c6e
A
4710 if (m2->compressor) {
4711 m2->pmapped = TRUE;
4712 m2->wpmapped = TRUE;
2d21ac55 4713
39236c6e
A
4714 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4715 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
3e170ce0 4716
39236c6e 4717 compressed_pages++;
3e170ce0 4718
39236c6e
A
4719 } else {
4720 if (m2->reference)
4721 vm_page_activate(m2);
4722 else
4723 vm_page_deactivate(m2);
4724 }
2d21ac55
A
4725 PAGE_WAKEUP_DONE(m2);
4726
4727 } else {
39236c6e
A
4728 assert(!m1->compressor);
4729
2d21ac55
A
4730 /*
4731 * completely cleans up the state
4732 * of the page so that it is ready
4733 * to be put onto the free list, or
4734 * for this purpose it looks like it
4735 * just came off of the free list
4736 */
4737 vm_page_free_prepare(m1);
4738 }
3e170ce0 4739
2d21ac55 4740 stolen_pages++;
3e170ce0 4741
1c79356b 4742 }
2d21ac55
A
4743 m1->pageq.next = (queue_entry_t) m;
4744 m1->pageq.prev = NULL;
4745 m = m1;
e5568f75 4746 }
2d21ac55
A
4747 if (locked_object) {
4748 vm_object_unlock(locked_object);
4749 locked_object = VM_OBJECT_NULL;
1c79356b
A
4750 }
4751
2d21ac55
A
4752 if (abort_run == TRUE) {
4753 if (m != VM_PAGE_NULL) {
b0d623f7 4754 vm_page_free_list(m, FALSE);
2d21ac55 4755 }
3e170ce0 4756
2d21ac55 4757 dumped_run++;
3e170ce0 4758
2d21ac55
A
4759 /*
4760 * want the index of the last
4761 * page in this run that was
4762 * successfully 'stolen', so back
4763 * it up 1 for the auto-decrement on use
4764 * and 1 more to bump back over this page
4765 */
4766 page_idx = tmp_start_idx + 2;
b0d623f7
A
4767 if (page_idx >= vm_pages_count) {
4768 if (wrapped)
4769 goto done_scanning;
4770 page_idx = last_idx = 0;
4771 wrapped = TRUE;
4772 }
4773 abort_run = FALSE;
4774
2d21ac55 4775 /*
b0d623f7
A
4776 * We didn't find a contiguous range but we didn't
4777 * start from the very first page.
4778 * Start again from the very first page.
2d21ac55 4779 */
b0d623f7
A
4780 RESET_STATE_OF_RUN();
4781
4782 if( flags & KMA_LOMEM)
4783 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4784 else
4785 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4786
4787 last_idx = page_idx;
2d21ac55 4788
b0d623f7
A
4789 lck_mtx_lock(&vm_page_queue_free_lock);
4790 /*
4791 * reset our free page limit since we
4792 * dropped the lock protecting the vm_page_free_queue
4793 */
4794 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
4795 goto retry;
4796 }
e5568f75 4797
e5568f75 4798 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55
A
4799
4800 if (wire == TRUE)
4801 m1->wire_count++;
4802 else
4803 m1->gobbled = TRUE;
e5568f75 4804 }
2d21ac55
A
4805 if (wire == FALSE)
4806 vm_page_gobble_count += npages;
4807
4808 /*
4809 * gobbled pages are also counted as wired pages
4810 */
e5568f75 4811 vm_page_wire_count += npages;
e5568f75 4812
2d21ac55
A
4813 assert(vm_page_verify_contiguous(m, npages));
4814 }
4815done_scanning:
39236c6e
A
4816 PAGE_REPLACEMENT_ALLOWED(FALSE);
4817
2d21ac55
A
4818 vm_page_unlock_queues();
4819
593a1d5f 4820#if DEBUG
2d21ac55
A
4821 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4822
4823 tv_end_sec -= tv_start_sec;
4824 if (tv_end_usec < tv_start_usec) {
4825 tv_end_sec--;
4826 tv_end_usec += 1000000;
1c79356b 4827 }
2d21ac55
A
4828 tv_end_usec -= tv_start_usec;
4829 if (tv_end_usec >= 1000000) {
4830 tv_end_sec++;
4831 tv_end_sec -= 1000000;
4832 }
b0d623f7 4833 if (vm_page_find_contig_debug) {
39236c6e
A
4834 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4835 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4836 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4837 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
b0d623f7 4838 }
e5568f75 4839
593a1d5f
A
4840#endif
4841#if MACH_ASSERT
2d21ac55
A
4842 vm_page_verify_free_lists();
4843#endif
3e170ce0
A
4844 if (m == NULL && zone_gc_called == FALSE) {
4845 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
4846 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4847 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
4848
4849 if (consider_buffer_cache_collect != NULL) {
4850 (void)(*consider_buffer_cache_collect)(1);
4851 }
4852
4853 consider_zone_gc(TRUE);
4854
4855 zone_gc_called = TRUE;
4856
4857 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
4858 goto full_scan_again;
4859 }
4860
e5568f75 4861 return m;
1c79356b
A
4862}
4863
4864/*
4865 * Allocate a list of contiguous, wired pages.
4866 */
4867kern_return_t
4868cpm_allocate(
4869 vm_size_t size,
4870 vm_page_t *list,
2d21ac55 4871 ppnum_t max_pnum,
b0d623f7
A
4872 ppnum_t pnum_mask,
4873 boolean_t wire,
4874 int flags)
1c79356b 4875{
91447636
A
4876 vm_page_t pages;
4877 unsigned int npages;
1c79356b 4878
6d2010ae 4879 if (size % PAGE_SIZE != 0)
1c79356b
A
4880 return KERN_INVALID_ARGUMENT;
4881
b0d623f7
A
4882 npages = (unsigned int) (size / PAGE_SIZE);
4883 if (npages != size / PAGE_SIZE) {
4884 /* 32-bit overflow */
4885 return KERN_INVALID_ARGUMENT;
4886 }
1c79356b 4887
1c79356b
A
4888 /*
4889 * Obtain a pointer to a subset of the free
4890 * list large enough to satisfy the request;
4891 * the region will be physically contiguous.
4892 */
b0d623f7 4893 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 4894
2d21ac55 4895 if (pages == VM_PAGE_NULL)
1c79356b 4896 return KERN_NO_SPACE;
1c79356b 4897 /*
2d21ac55 4898 * determine need for wakeups
1c79356b 4899 */
2d21ac55 4900 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
4901 ((vm_page_free_count < vm_page_free_target) &&
4902 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4903 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 4904
6d2010ae
A
4905 VM_CHECK_MEMORYSTATUS;
4906
1c79356b
A
4907 /*
4908 * The CPM pages should now be available and
4909 * ordered by ascending physical address.
4910 */
4911 assert(vm_page_verify_contiguous(pages, npages));
4912
4913 *list = pages;
4914 return KERN_SUCCESS;
4915}
6d2010ae
A
4916
4917
4918unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4919
4920/*
4921 * when working on a 'run' of pages, it is necessary to hold
4922 * the vm_page_queue_lock (a hot global lock) for certain operations
4923 * on the page... however, the majority of the work can be done
4924 * while merely holding the object lock... in fact there are certain
4925 * collections of pages that don't require any work brokered by the
4926 * vm_page_queue_lock... to mitigate the time spent behind the global
4927 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4928 * while doing all of the work that doesn't require the vm_page_queue_lock...
4929 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4930 * necessary work for each page... we will grab the busy bit on the page
4931 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4932 * if it can't immediately take the vm_page_queue_lock in order to compete
4933 * for the locks in the same order that vm_pageout_scan takes them.
4934 * the operation names are modeled after the names of the routines that
4935 * need to be called in order to make the changes very obvious in the
4936 * original loop
4937 */
4938
4939void
4940vm_page_do_delayed_work(
4941 vm_object_t object,
3e170ce0 4942 vm_tag_t tag,
6d2010ae
A
4943 struct vm_page_delayed_work *dwp,
4944 int dw_count)
4945{
4946 int j;
4947 vm_page_t m;
4948 vm_page_t local_free_q = VM_PAGE_NULL;
6d2010ae
A
4949
4950 /*
4951 * pageout_scan takes the vm_page_lock_queues first
4952 * then tries for the object lock... to avoid what
4953 * is effectively a lock inversion, we'll go to the
4954 * trouble of taking them in that same order... otherwise
4955 * if this object contains the majority of the pages resident
4956 * in the UBC (or a small set of large objects actively being
4957 * worked on contain the majority of the pages), we could
4958 * cause the pageout_scan thread to 'starve' in its attempt
4959 * to find pages to move to the free queue, since it has to
4960 * successfully acquire the object lock of any candidate page
4961 * before it can steal/clean it.
4962 */
4963 if (!vm_page_trylockspin_queues()) {
4964 vm_object_unlock(object);
4965
4966 vm_page_lockspin_queues();
4967
4968 for (j = 0; ; j++) {
4969 if (!vm_object_lock_avoid(object) &&
4970 _vm_object_lock_try(object))
4971 break;
4972 vm_page_unlock_queues();
4973 mutex_pause(j);
4974 vm_page_lockspin_queues();
4975 }
6d2010ae
A
4976 }
4977 for (j = 0; j < dw_count; j++, dwp++) {
4978
4979 m = dwp->dw_m;
4980
6d2010ae
A
4981 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4982 vm_pageout_throttle_up(m);
fe8ab488
A
4983#if CONFIG_PHANTOM_CACHE
4984 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4985 vm_phantom_cache_update(m);
4986#endif
6d2010ae 4987 if (dwp->dw_mask & DW_vm_page_wire)
3e170ce0 4988 vm_page_wire(m, tag, FALSE);
6d2010ae
A
4989 else if (dwp->dw_mask & DW_vm_page_unwire) {
4990 boolean_t queueit;
4991
fe8ab488 4992 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
6d2010ae
A
4993
4994 vm_page_unwire(m, queueit);
4995 }
4996 if (dwp->dw_mask & DW_vm_page_free) {
4997 vm_page_free_prepare_queues(m);
4998
4999 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
5000 /*
5001 * Add this page to our list of reclaimed pages,
5002 * to be freed later.
5003 */
5004 m->pageq.next = (queue_entry_t) local_free_q;
5005 local_free_q = m;
5006 } else {
5007 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5008 vm_page_deactivate_internal(m, FALSE);
5009 else if (dwp->dw_mask & DW_vm_page_activate) {
5010 if (m->active == FALSE) {
5011 vm_page_activate(m);
5012 }
5013 }
5014 else if (dwp->dw_mask & DW_vm_page_speculate)
5015 vm_page_speculate(m, TRUE);
316670eb
A
5016 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5017 /*
5018 * if we didn't hold the object lock and did this,
5019 * we might disconnect the page, then someone might
5020 * soft fault it back in, then we would put it on the
5021 * cleaned queue, and so we would have a referenced (maybe even dirty)
5022 * page on that queue, which we don't want
5023 */
5024 int refmod_state = pmap_disconnect(m->phys_page);
5025
5026 if ((refmod_state & VM_MEM_REFERENCED)) {
5027 /*
5028 * this page has been touched since it got cleaned; let's activate it
5029 * if it hasn't already been
5030 */
5031 vm_pageout_enqueued_cleaned++;
5032 vm_pageout_cleaned_reactivated++;
5033 vm_pageout_cleaned_commit_reactivated++;
5034
5035 if (m->active == FALSE)
5036 vm_page_activate(m);
5037 } else {
5038 m->reference = FALSE;
5039 vm_page_enqueue_cleaned(m);
5040 }
5041 }
6d2010ae
A
5042 else if (dwp->dw_mask & DW_vm_page_lru)
5043 vm_page_lru(m);
316670eb
A
5044 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5045 if ( !m->pageout_queue)
3e170ce0 5046 vm_page_queues_remove(m);
316670eb 5047 }
6d2010ae
A
5048 if (dwp->dw_mask & DW_set_reference)
5049 m->reference = TRUE;
5050 else if (dwp->dw_mask & DW_clear_reference)
5051 m->reference = FALSE;
5052
5053 if (dwp->dw_mask & DW_move_page) {
316670eb 5054 if ( !m->pageout_queue) {
3e170ce0 5055 vm_page_queues_remove(m);
6d2010ae 5056
316670eb 5057 assert(m->object != kernel_object);
6d2010ae 5058
3e170ce0 5059 vm_page_enqueue_inactive(m, FALSE);
316670eb 5060 }
6d2010ae
A
5061 }
5062 if (dwp->dw_mask & DW_clear_busy)
5063 m->busy = FALSE;
5064
5065 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5066 PAGE_WAKEUP(m);
5067 }
5068 }
5069 vm_page_unlock_queues();
5070
5071 if (local_free_q)
5072 vm_page_free_list(local_free_q, TRUE);
5073
5074 VM_CHECK_MEMORYSTATUS;
5075
5076}
5077
0b4c1975
A
5078kern_return_t
5079vm_page_alloc_list(
5080 int page_count,
5081 int flags,
5082 vm_page_t *list)
5083{
5084 vm_page_t lo_page_list = VM_PAGE_NULL;
5085 vm_page_t mem;
5086 int i;
5087
5088 if ( !(flags & KMA_LOMEM))
5089 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
5090
5091 for (i = 0; i < page_count; i++) {
5092
5093 mem = vm_page_grablo();
5094
5095 if (mem == VM_PAGE_NULL) {
5096 if (lo_page_list)
5097 vm_page_free_list(lo_page_list, FALSE);
5098
5099 *list = VM_PAGE_NULL;
5100
5101 return (KERN_RESOURCE_SHORTAGE);
5102 }
5103 mem->pageq.next = (queue_entry_t) lo_page_list;
5104 lo_page_list = mem;
5105 }
5106 *list = lo_page_list;
5107
5108 return (KERN_SUCCESS);
5109}
5110
5111void
5112vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
5113{
5114 page->offset = offset;
5115}
5116
5117vm_page_t
5118vm_page_get_next(vm_page_t page)
5119{
5120 return ((vm_page_t) page->pageq.next);
5121}
5122
5123vm_object_offset_t
5124vm_page_get_offset(vm_page_t page)
5125{
5126 return (page->offset);
5127}
5128
5129ppnum_t
5130vm_page_get_phys_page(vm_page_t page)
5131{
5132 return (page->phys_page);
5133}
5134
5135
b0d623f7
A
5136/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5137
d1ecb069
A
5138#if HIBERNATION
5139
b0d623f7
A
5140static vm_page_t hibernate_gobble_queue;
5141
0b4c1975 5142static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
39236c6e 5143static int hibernate_flush_dirty_pages(int);
0b4c1975 5144static int hibernate_flush_queue(queue_head_t *, int);
0b4c1975
A
5145
5146void hibernate_flush_wait(void);
5147void hibernate_mark_in_progress(void);
5148void hibernate_clear_in_progress(void);
5149
39236c6e
A
5150void hibernate_free_range(int, int);
5151void hibernate_hash_insert_page(vm_page_t);
5152uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
5153void hibernate_rebuild_vm_structs(void);
5154uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
5155ppnum_t hibernate_lookup_paddr(unsigned int);
0b4c1975
A
5156
5157struct hibernate_statistics {
5158 int hibernate_considered;
5159 int hibernate_reentered_on_q;
5160 int hibernate_found_dirty;
5161 int hibernate_skipped_cleaning;
5162 int hibernate_skipped_transient;
5163 int hibernate_skipped_precious;
39236c6e 5164 int hibernate_skipped_external;
0b4c1975
A
5165 int hibernate_queue_nolock;
5166 int hibernate_queue_paused;
5167 int hibernate_throttled;
5168 int hibernate_throttle_timeout;
5169 int hibernate_drained;
5170 int hibernate_drain_timeout;
5171 int cd_lock_failed;
5172 int cd_found_precious;
5173 int cd_found_wired;
5174 int cd_found_busy;
5175 int cd_found_unusual;
5176 int cd_found_cleaning;
5177 int cd_found_laundry;
5178 int cd_found_dirty;
39236c6e 5179 int cd_found_xpmapped;
8a3053a0 5180 int cd_skipped_xpmapped;
0b4c1975
A
5181 int cd_local_free;
5182 int cd_total_free;
5183 int cd_vm_page_wire_count;
39236c6e 5184 int cd_vm_struct_pages_unneeded;
0b4c1975
A
5185 int cd_pages;
5186 int cd_discarded;
5187 int cd_count_wire;
5188} hibernate_stats;
5189
5190
8a3053a0
A
5191/*
5192 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5193 * so that we don't overrun the estimated image size, which would
5194 * result in a hibernation failure.
5195 */
5196#define HIBERNATE_XPMAPPED_LIMIT 40000
5197
0b4c1975
A
5198
5199static int
5200hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5201{
5202 wait_result_t wait_result;
5203
5204 vm_page_lock_queues();
5205
39236c6e 5206 while ( !queue_empty(&q->pgo_pending) ) {
0b4c1975
A
5207
5208 q->pgo_draining = TRUE;
5209
5210 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5211
5212 vm_page_unlock_queues();
5213
5214 wait_result = thread_block(THREAD_CONTINUE_NULL);
5215
39236c6e 5216 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
0b4c1975 5217 hibernate_stats.hibernate_drain_timeout++;
39236c6e
A
5218
5219 if (q == &vm_pageout_queue_external)
5220 return (0);
5221
0b4c1975
A
5222 return (1);
5223 }
5224 vm_page_lock_queues();
5225
5226 hibernate_stats.hibernate_drained++;
5227 }
5228 vm_page_unlock_queues();
5229
5230 return (0);
5231}
5232
0b4c1975 5233
39236c6e
A
5234boolean_t hibernate_skip_external = FALSE;
5235
0b4c1975
A
5236static int
5237hibernate_flush_queue(queue_head_t *q, int qcount)
5238{
5239 vm_page_t m;
5240 vm_object_t l_object = NULL;
5241 vm_object_t m_object = NULL;
5242 int refmod_state = 0;
5243 int try_failed_count = 0;
5244 int retval = 0;
5245 int current_run = 0;
5246 struct vm_pageout_queue *iq;
5247 struct vm_pageout_queue *eq;
5248 struct vm_pageout_queue *tq;
5249
5250
5251 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5252
5253 iq = &vm_pageout_queue_internal;
5254 eq = &vm_pageout_queue_external;
5255
5256 vm_page_lock_queues();
5257
5258 while (qcount && !queue_empty(q)) {
5259
5260 if (current_run++ == 1000) {
5261 if (hibernate_should_abort()) {
5262 retval = 1;
5263 break;
5264 }
5265 current_run = 0;
5266 }
5267
5268 m = (vm_page_t) queue_first(q);
5269 m_object = m->object;
5270
5271 /*
5272 * check to see if we currently are working
5273 * with the same object... if so, we've
5274 * already got the lock
5275 */
5276 if (m_object != l_object) {
5277 /*
5278 * the object associated with candidate page is
5279 * different from the one we were just working
5280 * with... dump the lock if we still own it
5281 */
5282 if (l_object != NULL) {
5283 vm_object_unlock(l_object);
5284 l_object = NULL;
5285 }
5286 /*
5287 * Try to lock object; since we've alread got the
5288 * page queues lock, we can only 'try' for this one.
5289 * if the 'try' fails, we need to do a mutex_pause
5290 * to allow the owner of the object lock a chance to
5291 * run...
5292 */
5293 if ( !vm_object_lock_try_scan(m_object)) {
5294
5295 if (try_failed_count > 20) {
5296 hibernate_stats.hibernate_queue_nolock++;
5297
5298 goto reenter_pg_on_q;
5299 }
0b4c1975
A
5300
5301 vm_page_unlock_queues();
5302 mutex_pause(try_failed_count++);
5303 vm_page_lock_queues();
5304
5305 hibernate_stats.hibernate_queue_paused++;
5306 continue;
5307 } else {
5308 l_object = m_object;
0b4c1975
A
5309 }
5310 }
316670eb 5311 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
0b4c1975
A
5312 /*
5313 * page is not to be cleaned
5314 * put it back on the head of its queue
5315 */
5316 if (m->cleaning)
5317 hibernate_stats.hibernate_skipped_cleaning++;
5318 else
5319 hibernate_stats.hibernate_skipped_transient++;
5320
5321 goto reenter_pg_on_q;
5322 }
0b4c1975
A
5323 if (m_object->copy == VM_OBJECT_NULL) {
5324 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5325 /*
5326 * let the normal hibernate image path
5327 * deal with these
5328 */
5329 goto reenter_pg_on_q;
5330 }
5331 }
5332 if ( !m->dirty && m->pmapped) {
5333 refmod_state = pmap_get_refmod(m->phys_page);
5334
316670eb
A
5335 if ((refmod_state & VM_MEM_MODIFIED)) {
5336 SET_PAGE_DIRTY(m, FALSE);
5337 }
0b4c1975
A
5338 } else
5339 refmod_state = 0;
5340
5341 if ( !m->dirty) {
5342 /*
5343 * page is not to be cleaned
5344 * put it back on the head of its queue
5345 */
5346 if (m->precious)
5347 hibernate_stats.hibernate_skipped_precious++;
5348
5349 goto reenter_pg_on_q;
5350 }
39236c6e
A
5351
5352 if (hibernate_skip_external == TRUE && !m_object->internal) {
5353
5354 hibernate_stats.hibernate_skipped_external++;
5355
5356 goto reenter_pg_on_q;
5357 }
0b4c1975
A
5358 tq = NULL;
5359
5360 if (m_object->internal) {
5361 if (VM_PAGE_Q_THROTTLED(iq))
5362 tq = iq;
5363 } else if (VM_PAGE_Q_THROTTLED(eq))
5364 tq = eq;
5365
5366 if (tq != NULL) {
5367 wait_result_t wait_result;
5368 int wait_count = 5;
5369
5370 if (l_object != NULL) {
5371 vm_object_unlock(l_object);
5372 l_object = NULL;
5373 }
0b4c1975 5374
0b4c1975
A
5375 while (retval == 0) {
5376
39236c6e
A
5377 tq->pgo_throttled = TRUE;
5378
0b4c1975
A
5379 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5380
316670eb 5381 vm_page_unlock_queues();
0b4c1975 5382
316670eb 5383 wait_result = thread_block(THREAD_CONTINUE_NULL);
0b4c1975
A
5384
5385 vm_page_lock_queues();
5386
39236c6e
A
5387 if (wait_result != THREAD_TIMED_OUT)
5388 break;
5389 if (!VM_PAGE_Q_THROTTLED(tq))
5390 break;
5391
0b4c1975
A
5392 if (hibernate_should_abort())
5393 retval = 1;
5394
0b4c1975 5395 if (--wait_count == 0) {
39236c6e 5396
316670eb 5397 hibernate_stats.hibernate_throttle_timeout++;
39236c6e
A
5398
5399 if (tq == eq) {
5400 hibernate_skip_external = TRUE;
5401 break;
5402 }
316670eb
A
5403 retval = 1;
5404 }
0b4c1975
A
5405 }
5406 if (retval)
5407 break;
5408
5409 hibernate_stats.hibernate_throttled++;
5410
5411 continue;
5412 }
316670eb
A
5413 /*
5414 * we've already factored out pages in the laundry which
5415 * means this page can't be on the pageout queue so it's
3e170ce0 5416 * safe to do the vm_page_queues_remove
316670eb
A
5417 */
5418 assert(!m->pageout_queue);
5419
3e170ce0 5420 vm_page_queues_remove(m);
0b4c1975 5421
fe8ab488
A
5422 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5423 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
39236c6e 5424
3e170ce0 5425 (void)vm_pageout_cluster(m, FALSE, FALSE, FALSE);
0b4c1975
A
5426
5427 hibernate_stats.hibernate_found_dirty++;
5428
5429 goto next_pg;
5430
5431reenter_pg_on_q:
5432 queue_remove(q, m, vm_page_t, pageq);
5433 queue_enter(q, m, vm_page_t, pageq);
5434
5435 hibernate_stats.hibernate_reentered_on_q++;
5436next_pg:
5437 hibernate_stats.hibernate_considered++;
5438
5439 qcount--;
5440 try_failed_count = 0;
5441 }
5442 if (l_object != NULL) {
5443 vm_object_unlock(l_object);
5444 l_object = NULL;
5445 }
0b4c1975
A
5446
5447 vm_page_unlock_queues();
5448
5449 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5450
5451 return (retval);
5452}
5453
5454
5455static int
39236c6e 5456hibernate_flush_dirty_pages(int pass)
0b4c1975
A
5457{
5458 struct vm_speculative_age_q *aq;
5459 uint32_t i;
5460
0b4c1975
A
5461 if (vm_page_local_q) {
5462 for (i = 0; i < vm_page_local_q_count; i++)
5463 vm_page_reactivate_local(i, TRUE, FALSE);
5464 }
5465
5466 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5467 int qcount;
5468 vm_page_t m;
5469
5470 aq = &vm_page_queue_speculative[i];
5471
5472 if (queue_empty(&aq->age_q))
5473 continue;
5474 qcount = 0;
5475
5476 vm_page_lockspin_queues();
5477
5478 queue_iterate(&aq->age_q,
5479 m,
5480 vm_page_t,
5481 pageq)
5482 {
5483 qcount++;
5484 }
5485 vm_page_unlock_queues();
5486
5487 if (qcount) {
5488 if (hibernate_flush_queue(&aq->age_q, qcount))
5489 return (1);
5490 }
5491 }
316670eb 5492 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
0b4c1975 5493 return (1);
316670eb
A
5494 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5495 return (1);
5496 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
0b4c1975 5497 return (1);
0b4c1975
A
5498 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5499 return (1);
0b4c1975 5500
39236c6e
A
5501 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5502 vm_compressor_record_warmup_start();
5503
5504 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5505 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5506 vm_compressor_record_warmup_end();
5507 return (1);
5508 }
5509 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5510 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5511 vm_compressor_record_warmup_end();
5512 return (1);
5513 }
5514 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5515 vm_compressor_record_warmup_end();
5516
5517 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5518 return (1);
5519
5520 return (0);
5521}
0b4c1975 5522
0b4c1975 5523
fe8ab488
A
5524void
5525hibernate_reset_stats()
5526{
5527 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5528}
5529
5530
0b4c1975
A
5531int
5532hibernate_flush_memory()
5533{
5534 int retval;
5535
5536 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5537
39236c6e
A
5538 hibernate_cleaning_in_progress = TRUE;
5539 hibernate_skip_external = FALSE;
5540
5541 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5542
5543 if (COMPRESSED_PAGER_IS_ACTIVE) {
0b4c1975 5544
39236c6e 5545 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
0b4c1975 5546
39236c6e
A
5547 vm_compressor_flush();
5548
5549 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
39236c6e 5550 }
fe8ab488 5551 if (consider_buffer_cache_collect != NULL) {
39236c6e
A
5552 unsigned int orig_wire_count;
5553
5554 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5555 orig_wire_count = vm_page_wire_count;
0b4c1975 5556
0b4c1975 5557 (void)(*consider_buffer_cache_collect)(1);
7ddcb079 5558 consider_zone_gc(TRUE);
0b4c1975 5559
39236c6e
A
5560 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5561
5562 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
0b4c1975
A
5563 }
5564 }
39236c6e
A
5565 hibernate_cleaning_in_progress = FALSE;
5566
0b4c1975
A
5567 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5568
39236c6e
A
5569 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5570 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5571
5572
0b4c1975
A
5573 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5574 hibernate_stats.hibernate_considered,
5575 hibernate_stats.hibernate_reentered_on_q,
5576 hibernate_stats.hibernate_found_dirty);
39236c6e 5577 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
0b4c1975
A
5578 hibernate_stats.hibernate_skipped_cleaning,
5579 hibernate_stats.hibernate_skipped_transient,
5580 hibernate_stats.hibernate_skipped_precious,
39236c6e 5581 hibernate_stats.hibernate_skipped_external,
0b4c1975
A
5582 hibernate_stats.hibernate_queue_nolock);
5583 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5584 hibernate_stats.hibernate_queue_paused,
5585 hibernate_stats.hibernate_throttled,
5586 hibernate_stats.hibernate_throttle_timeout,
5587 hibernate_stats.hibernate_drained,
5588 hibernate_stats.hibernate_drain_timeout);
5589
5590 return (retval);
5591}
5592
6d2010ae 5593
b0d623f7
A
5594static void
5595hibernate_page_list_zero(hibernate_page_list_t *list)
5596{
5597 uint32_t bank;
5598 hibernate_bitmap_t * bitmap;
5599
5600 bitmap = &list->bank_bitmap[0];
5601 for (bank = 0; bank < list->bank_count; bank++)
5602 {
5603 uint32_t last_bit;
5604
5605 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5606 // set out-of-bound bits at end of bitmap.
5607 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5608 if (last_bit)
5609 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5610
5611 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5612 }
5613}
5614
b0d623f7
A
5615void
5616hibernate_free_gobble_pages(void)
5617{
5618 vm_page_t m, next;
5619 uint32_t count = 0;
5620
5621 m = (vm_page_t) hibernate_gobble_queue;
5622 while(m)
5623 {
5624 next = (vm_page_t) m->pageq.next;
5625 vm_page_free(m);
5626 count++;
5627 m = next;
5628 }
5629 hibernate_gobble_queue = VM_PAGE_NULL;
5630
5631 if (count)
5632 HIBLOG("Freed %d pages\n", count);
5633}
5634
5635static boolean_t
db609669 5636hibernate_consider_discard(vm_page_t m, boolean_t preflight)
b0d623f7
A
5637{
5638 vm_object_t object = NULL;
5639 int refmod_state;
5640 boolean_t discard = FALSE;
5641
5642 do
5643 {
0b4c1975 5644 if (m->private)
b0d623f7
A
5645 panic("hibernate_consider_discard: private");
5646
0b4c1975 5647 if (!vm_object_lock_try(m->object)) {
db609669 5648 if (!preflight) hibernate_stats.cd_lock_failed++;
b0d623f7 5649 break;
0b4c1975 5650 }
b0d623f7
A
5651 object = m->object;
5652
0b4c1975 5653 if (VM_PAGE_WIRED(m)) {
db609669 5654 if (!preflight) hibernate_stats.cd_found_wired++;
b0d623f7 5655 break;
0b4c1975
A
5656 }
5657 if (m->precious) {
db609669 5658 if (!preflight) hibernate_stats.cd_found_precious++;
b0d623f7 5659 break;
0b4c1975
A
5660 }
5661 if (m->busy || !object->alive) {
b0d623f7
A
5662 /*
5663 * Somebody is playing with this page.
5664 */
db609669 5665 if (!preflight) hibernate_stats.cd_found_busy++;
6d2010ae 5666 break;
0b4c1975
A
5667 }
5668 if (m->absent || m->unusual || m->error) {
b0d623f7
A
5669 /*
5670 * If it's unusual in anyway, ignore it
5671 */
db609669 5672 if (!preflight) hibernate_stats.cd_found_unusual++;
b0d623f7 5673 break;
0b4c1975
A
5674 }
5675 if (m->cleaning) {
db609669 5676 if (!preflight) hibernate_stats.cd_found_cleaning++;
b0d623f7 5677 break;
0b4c1975 5678 }
316670eb 5679 if (m->laundry) {
db609669 5680 if (!preflight) hibernate_stats.cd_found_laundry++;
b0d623f7 5681 break;
0b4c1975 5682 }
b0d623f7
A
5683 if (!m->dirty)
5684 {
5685 refmod_state = pmap_get_refmod(m->phys_page);
5686
5687 if (refmod_state & VM_MEM_REFERENCED)
5688 m->reference = TRUE;
316670eb
A
5689 if (refmod_state & VM_MEM_MODIFIED) {
5690 SET_PAGE_DIRTY(m, FALSE);
5691 }
b0d623f7
A
5692 }
5693
5694 /*
5695 * If it's clean or purgeable we can discard the page on wakeup.
5696 */
5697 discard = (!m->dirty)
5698 || (VM_PURGABLE_VOLATILE == object->purgable)
0b4c1975
A
5699 || (VM_PURGABLE_EMPTY == object->purgable);
5700
39236c6e
A
5701
5702 if (discard == FALSE) {
5703 if (!preflight)
5704 hibernate_stats.cd_found_dirty++;
8a3053a0
A
5705 } else if (m->xpmapped && m->reference && !object->internal) {
5706 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5707 if (!preflight)
5708 hibernate_stats.cd_found_xpmapped++;
5709 discard = FALSE;
5710 } else {
5711 if (!preflight)
5712 hibernate_stats.cd_skipped_xpmapped++;
5713 }
39236c6e 5714 }
b0d623f7
A
5715 }
5716 while (FALSE);
5717
5718 if (object)
5719 vm_object_unlock(object);
5720
5721 return (discard);
5722}
5723
5724
5725static void
5726hibernate_discard_page(vm_page_t m)
5727{
5728 if (m->absent || m->unusual || m->error)
5729 /*
5730 * If it's unusual in anyway, ignore
5731 */
5732 return;
5733
fe8ab488 5734#if MACH_ASSERT || DEBUG
316670eb
A
5735 vm_object_t object = m->object;
5736 if (!vm_object_lock_try(m->object))
5737 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5738#else
5739 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5740 makes sure these locks are uncontended before sleep */
fe8ab488 5741#endif /* MACH_ASSERT || DEBUG */
316670eb 5742
b0d623f7
A
5743 if (m->pmapped == TRUE)
5744 {
5745 __unused int refmod_state = pmap_disconnect(m->phys_page);
5746 }
5747
5748 if (m->laundry)
5749 panic("hibernate_discard_page(%p) laundry", m);
5750 if (m->private)
5751 panic("hibernate_discard_page(%p) private", m);
5752 if (m->fictitious)
5753 panic("hibernate_discard_page(%p) fictitious", m);
5754
5755 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5756 {
5757 /* object should be on a queue */
5758 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5759 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5760 assert(old_queue);
39236c6e
A
5761 if (m->object->purgeable_when_ripe) {
5762 vm_purgeable_token_delete_first(old_queue);
5763 }
b0d623f7 5764 m->object->purgable = VM_PURGABLE_EMPTY;
fe8ab488
A
5765
5766 /*
5767 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5768 * accounted in the "volatile" ledger, so no change here.
5769 * We have to update vm_page_purgeable_count, though, since we're
5770 * effectively purging this object.
5771 */
5772 unsigned int delta;
5773 assert(m->object->resident_page_count >= m->object->wired_page_count);
5774 delta = (m->object->resident_page_count - m->object->wired_page_count);
5775 assert(vm_page_purgeable_count >= delta);
5776 assert(delta > 0);
5777 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
b0d623f7
A
5778 }
5779
5780 vm_page_free(m);
316670eb 5781
fe8ab488 5782#if MACH_ASSERT || DEBUG
316670eb 5783 vm_object_unlock(object);
fe8ab488 5784#endif /* MACH_ASSERT || DEBUG */
b0d623f7
A
5785}
5786
db609669
A
5787/*
5788 Grab locks for hibernate_page_list_setall()
5789*/
5790void
5791hibernate_vm_lock_queues(void)
5792{
39236c6e 5793 vm_object_lock(compressor_object);
db609669
A
5794 vm_page_lock_queues();
5795 lck_mtx_lock(&vm_page_queue_free_lock);
5796
5797 if (vm_page_local_q) {
5798 uint32_t i;
5799 for (i = 0; i < vm_page_local_q_count; i++) {
5800 struct vpl *lq;
5801 lq = &vm_page_local_q[i].vpl_un.vpl;
5802 VPL_LOCK(&lq->vpl_lock);
5803 }
5804 }
5805}
5806
5807void
5808hibernate_vm_unlock_queues(void)
5809{
5810 if (vm_page_local_q) {
5811 uint32_t i;
5812 for (i = 0; i < vm_page_local_q_count; i++) {
5813 struct vpl *lq;
5814 lq = &vm_page_local_q[i].vpl_un.vpl;
5815 VPL_UNLOCK(&lq->vpl_lock);
5816 }
5817 }
5818 lck_mtx_unlock(&vm_page_queue_free_lock);
5819 vm_page_unlock_queues();
39236c6e 5820 vm_object_unlock(compressor_object);
db609669
A
5821}
5822
b0d623f7
A
5823/*
5824 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5825 pages known to VM to not need saving are subtracted.
5826 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5827*/
5828
5829void
5830hibernate_page_list_setall(hibernate_page_list_t * page_list,
5831 hibernate_page_list_t * page_list_wired,
6d2010ae 5832 hibernate_page_list_t * page_list_pal,
39236c6e
A
5833 boolean_t preflight,
5834 boolean_t will_discard,
b0d623f7
A
5835 uint32_t * pagesOut)
5836{
5837 uint64_t start, end, nsec;
5838 vm_page_t m;
39236c6e 5839 vm_page_t next;
b0d623f7 5840 uint32_t pages = page_list->page_count;
39236c6e 5841 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
316670eb 5842 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
b0d623f7
A
5843 uint32_t count_wire = pages;
5844 uint32_t count_discard_active = 0;
5845 uint32_t count_discard_inactive = 0;
316670eb 5846 uint32_t count_discard_cleaned = 0;
b0d623f7
A
5847 uint32_t count_discard_purgeable = 0;
5848 uint32_t count_discard_speculative = 0;
39236c6e 5849 uint32_t count_discard_vm_struct_pages = 0;
b0d623f7
A
5850 uint32_t i;
5851 uint32_t bank;
5852 hibernate_bitmap_t * bitmap;
5853 hibernate_bitmap_t * bitmap_wired;
39236c6e
A
5854 boolean_t discard_all;
5855 boolean_t discard;
b0d623f7 5856
3e170ce0 5857 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
b0d623f7 5858
db609669
A
5859 if (preflight) {
5860 page_list = NULL;
5861 page_list_wired = NULL;
5862 page_list_pal = NULL;
39236c6e
A
5863 discard_all = FALSE;
5864 } else {
5865 discard_all = will_discard;
db609669 5866 }
0b4c1975 5867
fe8ab488 5868#if MACH_ASSERT || DEBUG
39236c6e
A
5869 if (!preflight)
5870 {
316670eb
A
5871 vm_page_lock_queues();
5872 if (vm_page_local_q) {
5873 for (i = 0; i < vm_page_local_q_count; i++) {
5874 struct vpl *lq;
5875 lq = &vm_page_local_q[i].vpl_un.vpl;
5876 VPL_LOCK(&lq->vpl_lock);
5877 }
5878 }
39236c6e 5879 }
fe8ab488 5880#endif /* MACH_ASSERT || DEBUG */
316670eb
A
5881
5882
0b4c1975 5883 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
b0d623f7
A
5884
5885 clock_get_uptime(&start);
5886
db609669
A
5887 if (!preflight) {
5888 hibernate_page_list_zero(page_list);
5889 hibernate_page_list_zero(page_list_wired);
5890 hibernate_page_list_zero(page_list_pal);
5891
5892 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5893 hibernate_stats.cd_pages = pages;
5894 }
0b4c1975 5895
b0d623f7
A
5896 if (vm_page_local_q) {
5897 for (i = 0; i < vm_page_local_q_count; i++)
db609669
A
5898 vm_page_reactivate_local(i, TRUE, !preflight);
5899 }
5900
5901 if (preflight) {
39236c6e 5902 vm_object_lock(compressor_object);
db609669
A
5903 vm_page_lock_queues();
5904 lck_mtx_lock(&vm_page_queue_free_lock);
b0d623f7
A
5905 }
5906
5907 m = (vm_page_t) hibernate_gobble_queue;
39236c6e 5908 while (m)
b0d623f7
A
5909 {
5910 pages--;
5911 count_wire--;
db609669
A
5912 if (!preflight) {
5913 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5914 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5915 }
b0d623f7
A
5916 m = (vm_page_t) m->pageq.next;
5917 }
6d2010ae 5918
db609669 5919 if (!preflight) for( i = 0; i < real_ncpus; i++ )
0b4c1975
A
5920 {
5921 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5922 {
5923 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5924 {
5925 pages--;
5926 count_wire--;
5927 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5928 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5929
5930 hibernate_stats.cd_local_free++;
5931 hibernate_stats.cd_total_free++;
5932 }
5933 }
5934 }
6d2010ae 5935
b0d623f7
A
5936 for( i = 0; i < vm_colors; i++ )
5937 {
5938 queue_iterate(&vm_page_queue_free[i],
5939 m,
5940 vm_page_t,
5941 pageq)
5942 {
5943 pages--;
5944 count_wire--;
db609669
A
5945 if (!preflight) {
5946 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5947 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5948
5949 hibernate_stats.cd_total_free++;
5950 }
b0d623f7
A
5951 }
5952 }
5953
5954 queue_iterate(&vm_lopage_queue_free,
5955 m,
5956 vm_page_t,
5957 pageq)
5958 {
5959 pages--;
5960 count_wire--;
db609669
A
5961 if (!preflight) {
5962 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5963 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5964
5965 hibernate_stats.cd_total_free++;
5966 }
b0d623f7
A
5967 }
5968
39236c6e
A
5969 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5970 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
b0d623f7 5971 {
39236c6e
A
5972 next = (vm_page_t) m->pageq.next;
5973 discard = FALSE;
b0d623f7 5974 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 5975 && hibernate_consider_discard(m, preflight))
b0d623f7 5976 {
db609669 5977 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7 5978 count_discard_inactive++;
39236c6e 5979 discard = discard_all;
b0d623f7
A
5980 }
5981 else
5982 count_throttled++;
5983 count_wire--;
db609669 5984 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
5985
5986 if (discard) hibernate_discard_page(m);
5987 m = next;
b0d623f7
A
5988 }
5989
39236c6e
A
5990 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5991 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
b0d623f7 5992 {
39236c6e
A
5993 next = (vm_page_t) m->pageq.next;
5994 discard = FALSE;
b0d623f7 5995 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 5996 && hibernate_consider_discard(m, preflight))
b0d623f7 5997 {
db609669 5998 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7
A
5999 if (m->dirty)
6000 count_discard_purgeable++;
6001 else
6002 count_discard_inactive++;
39236c6e 6003 discard = discard_all;
b0d623f7
A
6004 }
6005 else
39236c6e 6006 count_anonymous++;
b0d623f7 6007 count_wire--;
db609669 6008 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
6009 if (discard) hibernate_discard_page(m);
6010 m = next;
b0d623f7
A
6011 }
6012
8a3053a0
A
6013 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6014 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
b0d623f7 6015 {
39236c6e
A
6016 next = (vm_page_t) m->pageq.next;
6017 discard = FALSE;
b0d623f7 6018 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 6019 && hibernate_consider_discard(m, preflight))
b0d623f7 6020 {
db609669 6021 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7
A
6022 if (m->dirty)
6023 count_discard_purgeable++;
6024 else
8a3053a0 6025 count_discard_cleaned++;
39236c6e 6026 discard = discard_all;
b0d623f7
A
6027 }
6028 else
8a3053a0 6029 count_cleaned++;
b0d623f7 6030 count_wire--;
db609669 6031 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
6032 if (discard) hibernate_discard_page(m);
6033 m = next;
b0d623f7
A
6034 }
6035
8a3053a0
A
6036 m = (vm_page_t) queue_first(&vm_page_queue_active);
6037 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6038 {
6039 next = (vm_page_t) m->pageq.next;
6040 discard = FALSE;
6041 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
6042 && hibernate_consider_discard(m, preflight))
6043 {
6044 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6045 if (m->dirty)
6046 count_discard_purgeable++;
6047 else
6048 count_discard_active++;
6049 discard = discard_all;
6050 }
6051 else
6052 count_active++;
6053 count_wire--;
6054 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6055 if (discard) hibernate_discard_page(m);
6056 m = next;
6057 }
6058
6059 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6060 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
316670eb 6061 {
39236c6e
A
6062 next = (vm_page_t) m->pageq.next;
6063 discard = FALSE;
316670eb 6064 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 6065 && hibernate_consider_discard(m, preflight))
316670eb 6066 {
db609669 6067 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
316670eb
A
6068 if (m->dirty)
6069 count_discard_purgeable++;
6070 else
8a3053a0 6071 count_discard_inactive++;
39236c6e 6072 discard = discard_all;
316670eb
A
6073 }
6074 else
8a3053a0 6075 count_inactive++;
316670eb 6076 count_wire--;
db609669 6077 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
6078 if (discard) hibernate_discard_page(m);
6079 m = next;
316670eb
A
6080 }
6081
b0d623f7
A
6082 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6083 {
39236c6e
A
6084 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6085 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6086 {
6087 next = (vm_page_t) m->pageq.next;
6088 discard = FALSE;
6089 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6090 && hibernate_consider_discard(m, preflight))
6091 {
6092 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6093 count_discard_speculative++;
6094 discard = discard_all;
6095 }
6096 else
6097 count_speculative++;
6098 count_wire--;
6099 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6100 if (discard) hibernate_discard_page(m);
6101 m = next;
6102 }
b0d623f7
A
6103 }
6104
39236c6e
A
6105 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
6106 {
6107 count_compressor++;
6108 count_wire--;
6109 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6110 }
6111
6112 if (preflight == FALSE && discard_all == TRUE) {
6113 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6114
6115 HIBLOG("hibernate_teardown started\n");
6116 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
6117 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
6118
6119 pages -= count_discard_vm_struct_pages;
6120 count_wire -= count_discard_vm_struct_pages;
6121
6122 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
6123
6124 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
b0d623f7
A
6125 }
6126
db609669
A
6127 if (!preflight) {
6128 // pull wired from hibernate_bitmap
6129 bitmap = &page_list->bank_bitmap[0];
6130 bitmap_wired = &page_list_wired->bank_bitmap[0];
6131 for (bank = 0; bank < page_list->bank_count; bank++)
6132 {
6133 for (i = 0; i < bitmap->bitmapwords; i++)
6134 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
6135 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
6136 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
6137 }
b0d623f7
A
6138 }
6139
6140 // machine dependent adjustments
db609669 6141 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
b0d623f7 6142
db609669
A
6143 if (!preflight) {
6144 hibernate_stats.cd_count_wire = count_wire;
39236c6e
A
6145 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6146 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
db609669 6147 }
0b4c1975 6148
b0d623f7
A
6149 clock_get_uptime(&end);
6150 absolutetime_to_nanoseconds(end - start, &nsec);
6151 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6152
39236c6e
A
6153 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6154 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6155 discard_all ? "did" : "could",
316670eb 6156 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7 6157
8a3053a0
A
6158 if (hibernate_stats.cd_skipped_xpmapped)
6159 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6160
316670eb
A
6161 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6162
39236c6e
A
6163 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6164
fe8ab488 6165#if MACH_ASSERT || DEBUG
39236c6e
A
6166 if (!preflight)
6167 {
316670eb
A
6168 if (vm_page_local_q) {
6169 for (i = 0; i < vm_page_local_q_count; i++) {
6170 struct vpl *lq;
6171 lq = &vm_page_local_q[i].vpl_un.vpl;
6172 VPL_UNLOCK(&lq->vpl_lock);
6173 }
6174 }
6175 vm_page_unlock_queues();
39236c6e 6176 }
fe8ab488 6177#endif /* MACH_ASSERT || DEBUG */
0b4c1975 6178
db609669
A
6179 if (preflight) {
6180 lck_mtx_unlock(&vm_page_queue_free_lock);
6181 vm_page_unlock_queues();
39236c6e 6182 vm_object_unlock(compressor_object);
db609669
A
6183 }
6184
0b4c1975 6185 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
6186}
6187
6188void
6189hibernate_page_list_discard(hibernate_page_list_t * page_list)
6190{
6191 uint64_t start, end, nsec;
6192 vm_page_t m;
6193 vm_page_t next;
6194 uint32_t i;
6195 uint32_t count_discard_active = 0;
6196 uint32_t count_discard_inactive = 0;
6197 uint32_t count_discard_purgeable = 0;
316670eb 6198 uint32_t count_discard_cleaned = 0;
b0d623f7
A
6199 uint32_t count_discard_speculative = 0;
6200
39236c6e 6201
fe8ab488 6202#if MACH_ASSERT || DEBUG
316670eb
A
6203 vm_page_lock_queues();
6204 if (vm_page_local_q) {
6205 for (i = 0; i < vm_page_local_q_count; i++) {
6206 struct vpl *lq;
6207 lq = &vm_page_local_q[i].vpl_un.vpl;
6208 VPL_LOCK(&lq->vpl_lock);
6209 }
6210 }
fe8ab488 6211#endif /* MACH_ASSERT || DEBUG */
316670eb 6212
b0d623f7
A
6213 clock_get_uptime(&start);
6214
316670eb
A
6215 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6216 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
b0d623f7
A
6217 {
6218 next = (vm_page_t) m->pageq.next;
6219 if (hibernate_page_bittst(page_list, m->phys_page))
6220 {
6221 if (m->dirty)
6222 count_discard_purgeable++;
6223 else
6224 count_discard_inactive++;
6225 hibernate_discard_page(m);
6226 }
6227 m = next;
6228 }
6229
6230 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6231 {
6232 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6233 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6234 {
6235 next = (vm_page_t) m->pageq.next;
6236 if (hibernate_page_bittst(page_list, m->phys_page))
6237 {
6238 count_discard_speculative++;
6239 hibernate_discard_page(m);
6240 }
6241 m = next;
6242 }
6243 }
6244
6245 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6246 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6247 {
6248 next = (vm_page_t) m->pageq.next;
6249 if (hibernate_page_bittst(page_list, m->phys_page))
6250 {
6251 if (m->dirty)
6252 count_discard_purgeable++;
6253 else
6254 count_discard_inactive++;
6255 hibernate_discard_page(m);
6256 }
6257 m = next;
6258 }
6259
6260 m = (vm_page_t) queue_first(&vm_page_queue_active);
6261 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6262 {
6263 next = (vm_page_t) m->pageq.next;
6264 if (hibernate_page_bittst(page_list, m->phys_page))
6265 {
6266 if (m->dirty)
6267 count_discard_purgeable++;
6268 else
6269 count_discard_active++;
6270 hibernate_discard_page(m);
6271 }
6272 m = next;
6273 }
6274
316670eb
A
6275 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6276 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6277 {
6278 next = (vm_page_t) m->pageq.next;
6279 if (hibernate_page_bittst(page_list, m->phys_page))
6280 {
6281 if (m->dirty)
6282 count_discard_purgeable++;
6283 else
6284 count_discard_cleaned++;
6285 hibernate_discard_page(m);
6286 }
6287 m = next;
6288 }
6289
fe8ab488 6290#if MACH_ASSERT || DEBUG
316670eb
A
6291 if (vm_page_local_q) {
6292 for (i = 0; i < vm_page_local_q_count; i++) {
6293 struct vpl *lq;
6294 lq = &vm_page_local_q[i].vpl_un.vpl;
6295 VPL_UNLOCK(&lq->vpl_lock);
6296 }
6297 }
6298 vm_page_unlock_queues();
fe8ab488 6299#endif /* MACH_ASSERT || DEBUG */
316670eb 6300
b0d623f7
A
6301 clock_get_uptime(&end);
6302 absolutetime_to_nanoseconds(end - start, &nsec);
316670eb 6303 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
b0d623f7 6304 nsec / 1000000ULL,
316670eb 6305 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7
A
6306}
6307
39236c6e
A
6308boolean_t hibernate_paddr_map_inited = FALSE;
6309boolean_t hibernate_rebuild_needed = FALSE;
6310unsigned int hibernate_teardown_last_valid_compact_indx = -1;
6311vm_page_t hibernate_rebuild_hash_list = NULL;
6312
6313unsigned int hibernate_teardown_found_tabled_pages = 0;
6314unsigned int hibernate_teardown_found_created_pages = 0;
6315unsigned int hibernate_teardown_found_free_pages = 0;
6316unsigned int hibernate_teardown_vm_page_free_count;
6317
6318
6319struct ppnum_mapping {
6320 struct ppnum_mapping *ppnm_next;
6321 ppnum_t ppnm_base_paddr;
6322 unsigned int ppnm_sindx;
6323 unsigned int ppnm_eindx;
6324};
6325
6326struct ppnum_mapping *ppnm_head;
6327struct ppnum_mapping *ppnm_last_found = NULL;
6328
6329
6330void
6331hibernate_create_paddr_map()
6332{
6333 unsigned int i;
6334 ppnum_t next_ppnum_in_run = 0;
6335 struct ppnum_mapping *ppnm = NULL;
6336
6337 if (hibernate_paddr_map_inited == FALSE) {
6338
6339 for (i = 0; i < vm_pages_count; i++) {
6340
6341 if (ppnm)
6342 ppnm->ppnm_eindx = i;
6343
6344 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6345
6346 ppnm = kalloc(sizeof(struct ppnum_mapping));
6347
6348 ppnm->ppnm_next = ppnm_head;
6349 ppnm_head = ppnm;
6350
6351 ppnm->ppnm_sindx = i;
6352 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6353 }
6354 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6355 }
6356 ppnm->ppnm_eindx++;
6357
6358 hibernate_paddr_map_inited = TRUE;
6359 }
6360}
6361
6362ppnum_t
6363hibernate_lookup_paddr(unsigned int indx)
6364{
6365 struct ppnum_mapping *ppnm = NULL;
6366
6367 ppnm = ppnm_last_found;
6368
6369 if (ppnm) {
6370 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6371 goto done;
6372 }
6373 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6374
6375 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6376 ppnm_last_found = ppnm;
6377 break;
6378 }
6379 }
6380 if (ppnm == NULL)
6381 panic("hibernate_lookup_paddr of %d failed\n", indx);
6382done:
6383 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6384}
6385
6386
6387uint32_t
6388hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6389{
6390 addr64_t saddr_aligned;
6391 addr64_t eaddr_aligned;
6392 addr64_t addr;
6393 ppnum_t paddr;
6394 unsigned int mark_as_unneeded_pages = 0;
6395
6396 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6397 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6398
6399 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6400
6401 paddr = pmap_find_phys(kernel_pmap, addr);
6402
6403 assert(paddr);
6404
6405 hibernate_page_bitset(page_list, TRUE, paddr);
6406 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6407
6408 mark_as_unneeded_pages++;
6409 }
6410 return (mark_as_unneeded_pages);
6411}
6412
6413
6414void
6415hibernate_hash_insert_page(vm_page_t mem)
6416{
6417 vm_page_bucket_t *bucket;
6418 int hash_id;
6419
15129b1c 6420 assert(mem->hashed);
39236c6e
A
6421 assert(mem->object);
6422 assert(mem->offset != (vm_object_offset_t) -1);
6423
6424 /*
6425 * Insert it into the object_object/offset hash table
6426 */
6427 hash_id = vm_page_hash(mem->object, mem->offset);
6428 bucket = &vm_page_buckets[hash_id];
6429
fe8ab488
A
6430 mem->next_m = bucket->page_list;
6431 bucket->page_list = VM_PAGE_PACK_PTR(mem);
39236c6e
A
6432}
6433
6434
6435void
6436hibernate_free_range(int sindx, int eindx)
6437{
6438 vm_page_t mem;
6439 unsigned int color;
6440
6441 while (sindx < eindx) {
6442 mem = &vm_pages[sindx];
6443
6444 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6445
6446 mem->lopage = FALSE;
6447 mem->free = TRUE;
6448
6449 color = mem->phys_page & vm_color_mask;
6450 queue_enter_first(&vm_page_queue_free[color],
6451 mem,
6452 vm_page_t,
6453 pageq);
6454 vm_page_free_count++;
6455
6456 sindx++;
6457 }
6458}
6459
6460
6461extern void hibernate_rebuild_pmap_structs(void);
6462
6463void
6464hibernate_rebuild_vm_structs(void)
6465{
6466 int cindx, sindx, eindx;
6467 vm_page_t mem, tmem, mem_next;
6468 AbsoluteTime startTime, endTime;
6469 uint64_t nsec;
6470
6471 if (hibernate_rebuild_needed == FALSE)
6472 return;
6473
6474 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6475 HIBLOG("hibernate_rebuild started\n");
6476
6477 clock_get_uptime(&startTime);
6478
6479 hibernate_rebuild_pmap_structs();
6480
6481 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6482 eindx = vm_pages_count;
6483
6484 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6485
6486 mem = &vm_pages[cindx];
6487 /*
6488 * hibernate_teardown_vm_structs leaves the location where
6489 * this vm_page_t must be located in "next".
6490 */
fe8ab488
A
6491 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6492 mem->next_m = VM_PAGE_PACK_PTR(NULL);
39236c6e
A
6493
6494 sindx = (int)(tmem - &vm_pages[0]);
6495
6496 if (mem != tmem) {
6497 /*
6498 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6499 * so move it back to its real location
6500 */
6501 *tmem = *mem;
6502 mem = tmem;
6503 }
15129b1c 6504 if (mem->hashed)
39236c6e
A
6505 hibernate_hash_insert_page(mem);
6506 /*
6507 * the 'hole' between this vm_page_t and the previous
6508 * vm_page_t we moved needs to be initialized as
6509 * a range of free vm_page_t's
6510 */
6511 hibernate_free_range(sindx + 1, eindx);
6512
6513 eindx = sindx;
6514 }
6515 if (sindx)
6516 hibernate_free_range(0, sindx);
6517
6518 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6519
6520 /*
15129b1c 6521 * process the list of vm_page_t's that were entered in the hash,
39236c6e
A
6522 * but were not located in the vm_pages arrary... these are
6523 * vm_page_t's that were created on the fly (i.e. fictitious)
6524 */
6525 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
fe8ab488 6526 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
39236c6e 6527
fe8ab488 6528 mem->next_m = VM_PAGE_PACK_PTR(NULL);
39236c6e
A
6529 hibernate_hash_insert_page(mem);
6530 }
6531 hibernate_rebuild_hash_list = NULL;
6532
6533 clock_get_uptime(&endTime);
6534 SUB_ABSOLUTETIME(&endTime, &startTime);
6535 absolutetime_to_nanoseconds(endTime, &nsec);
6536
6537 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6538
6539 hibernate_rebuild_needed = FALSE;
6540
6541 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6542}
6543
6544
6545extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6546
6547uint32_t
6548hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6549{
6550 unsigned int i;
6551 unsigned int compact_target_indx;
6552 vm_page_t mem, mem_next;
6553 vm_page_bucket_t *bucket;
6554 unsigned int mark_as_unneeded_pages = 0;
6555 unsigned int unneeded_vm_page_bucket_pages = 0;
6556 unsigned int unneeded_vm_pages_pages = 0;
6557 unsigned int unneeded_pmap_pages = 0;
6558 addr64_t start_of_unneeded = 0;
6559 addr64_t end_of_unneeded = 0;
6560
6561
6562 if (hibernate_should_abort())
6563 return (0);
6564
6565 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6566 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6567 vm_page_cleaned_count, compressor_object->resident_page_count);
6568
6569 for (i = 0; i < vm_page_bucket_count; i++) {
6570
6571 bucket = &vm_page_buckets[i];
6572
fe8ab488 6573 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
15129b1c 6574 assert(mem->hashed);
39236c6e 6575
fe8ab488 6576 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
39236c6e
A
6577
6578 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
fe8ab488 6579 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
39236c6e
A
6580 hibernate_rebuild_hash_list = mem;
6581 }
6582 }
6583 }
6584 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6585 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6586
6587 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6588
6589 compact_target_indx = 0;
6590
6591 for (i = 0; i < vm_pages_count; i++) {
6592
6593 mem = &vm_pages[i];
6594
6595 if (mem->free) {
6596 unsigned int color;
6597
6598 assert(mem->busy);
6599 assert(!mem->lopage);
6600
6601 color = mem->phys_page & vm_color_mask;
6602
6603 queue_remove(&vm_page_queue_free[color],
6604 mem,
6605 vm_page_t,
6606 pageq);
6607 mem->pageq.next = NULL;
6608 mem->pageq.prev = NULL;
6609
6610 vm_page_free_count--;
6611
6612 hibernate_teardown_found_free_pages++;
6613
6614 if ( !vm_pages[compact_target_indx].free)
6615 compact_target_indx = i;
6616 } else {
6617 /*
6618 * record this vm_page_t's original location
6619 * we need this even if it doesn't get moved
6620 * as an indicator to the rebuild function that
6621 * we don't have to move it
6622 */
fe8ab488 6623 mem->next_m = VM_PAGE_PACK_PTR(mem);
39236c6e
A
6624
6625 if (vm_pages[compact_target_indx].free) {
6626 /*
6627 * we've got a hole to fill, so
6628 * move this vm_page_t to it's new home
6629 */
6630 vm_pages[compact_target_indx] = *mem;
6631 mem->free = TRUE;
6632
6633 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6634 compact_target_indx++;
6635 } else
6636 hibernate_teardown_last_valid_compact_indx = i;
6637 }
6638 }
6639 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6640 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6641 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6642
6643 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6644
6645 if (start_of_unneeded) {
6646 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6647 mark_as_unneeded_pages += unneeded_pmap_pages;
6648 }
6649 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6650
6651 hibernate_rebuild_needed = TRUE;
6652
6653 return (mark_as_unneeded_pages);
6654}
6655
6656
d1ecb069
A
6657#endif /* HIBERNATION */
6658
b0d623f7 6659/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
6660
6661#include <mach_vm_debug.h>
6662#if MACH_VM_DEBUG
6663
6664#include <mach_debug/hash_info.h>
6665#include <vm/vm_debug.h>
6666
6667/*
6668 * Routine: vm_page_info
6669 * Purpose:
6670 * Return information about the global VP table.
6671 * Fills the buffer with as much information as possible
6672 * and returns the desired size of the buffer.
6673 * Conditions:
6674 * Nothing locked. The caller should provide
6675 * possibly-pageable memory.
6676 */
6677
6678unsigned int
6679vm_page_info(
6680 hash_info_bucket_t *info,
6681 unsigned int count)
6682{
91447636 6683 unsigned int i;
b0d623f7 6684 lck_spin_t *bucket_lock;
1c79356b
A
6685
6686 if (vm_page_bucket_count < count)
6687 count = vm_page_bucket_count;
6688
6689 for (i = 0; i < count; i++) {
6690 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6691 unsigned int bucket_count = 0;
6692 vm_page_t m;
6693
b0d623f7
A
6694 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6695 lck_spin_lock(bucket_lock);
6696
fe8ab488 6697 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
1c79356b 6698 bucket_count++;
b0d623f7
A
6699
6700 lck_spin_unlock(bucket_lock);
1c79356b
A
6701
6702 /* don't touch pageable memory while holding locks */
6703 info[i].hib_count = bucket_count;
6704 }
6705
6706 return vm_page_bucket_count;
6707}
6708#endif /* MACH_VM_DEBUG */
15129b1c
A
6709
6710#if VM_PAGE_BUCKETS_CHECK
6711void
6712vm_page_buckets_check(void)
6713{
6714 unsigned int i;
6715 vm_page_t p;
6716 unsigned int p_hash;
6717 vm_page_bucket_t *bucket;
6718 lck_spin_t *bucket_lock;
6719
6720 if (!vm_page_buckets_check_ready) {
6721 return;
6722 }
6723
6724#if HIBERNATION
6725 if (hibernate_rebuild_needed ||
6726 hibernate_rebuild_hash_list) {
6727 panic("BUCKET_CHECK: hibernation in progress: "
6728 "rebuild_needed=%d rebuild_hash_list=%p\n",
6729 hibernate_rebuild_needed,
6730 hibernate_rebuild_hash_list);
6731 }
6732#endif /* HIBERNATION */
6733
6734#if VM_PAGE_FAKE_BUCKETS
6735 char *cp;
6736 for (cp = (char *) vm_page_fake_buckets_start;
6737 cp < (char *) vm_page_fake_buckets_end;
6738 cp++) {
6739 if (*cp != 0x5a) {
6740 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6741 "[0x%llx:0x%llx]\n",
6742 cp,
fe8ab488
A
6743 (uint64_t) vm_page_fake_buckets_start,
6744 (uint64_t) vm_page_fake_buckets_end);
15129b1c
A
6745 }
6746 }
6747#endif /* VM_PAGE_FAKE_BUCKETS */
6748
6749 for (i = 0; i < vm_page_bucket_count; i++) {
6750 bucket = &vm_page_buckets[i];
fe8ab488 6751 if (!bucket->page_list) {
15129b1c
A
6752 continue;
6753 }
6754
6755 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6756 lck_spin_lock(bucket_lock);
fe8ab488 6757 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
15129b1c
A
6758 while (p != VM_PAGE_NULL) {
6759 if (!p->hashed) {
6760 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6761 "hash %d in bucket %d at %p "
6762 "is not hashed\n",
6763 p, p->object, p->offset,
6764 p_hash, i, bucket);
6765 }
6766 p_hash = vm_page_hash(p->object, p->offset);
6767 if (p_hash != i) {
6768 panic("BUCKET_CHECK: corruption in bucket %d "
6769 "at %p: page %p object %p offset 0x%llx "
6770 "hash %d\n",
6771 i, bucket, p, p->object, p->offset,
6772 p_hash);
6773 }
fe8ab488 6774 p = VM_PAGE_UNPACK_PTR(p->next_m);
15129b1c
A
6775 }
6776 lck_spin_unlock(bucket_lock);
6777 }
6778
6779// printf("BUCKET_CHECK: checked buckets\n");
6780}
6781#endif /* VM_PAGE_BUCKETS_CHECK */
3e170ce0
A
6782
6783/*
6784 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
6785 * local queues if they exist... its the only spot in the system where we add pages
6786 * to those queues... once on those queues, those pages can only move to one of the
6787 * global page queues or the free queues... they NEVER move from local q to local q.
6788 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
6789 * the global vm_page_queue_lock at this point... we still need to take the local lock
6790 * in case this operation is being run on a different CPU then the local queue's identity,
6791 * but we don't have to worry about the page moving to a global queue or becoming wired
6792 * while we're grabbing the local lock since those operations would require the global
6793 * vm_page_queue_lock to be held, and we already own it.
6794 *
6795 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
6796 * 'wired' and local are ALWAYS mutually exclusive conditions.
6797 */
6798void
6799vm_page_queues_remove(vm_page_t mem)
6800{
6801 boolean_t was_pageable;
6802
6803 VM_PAGE_QUEUES_ASSERT(mem, 1);
6804 assert(!mem->pageout_queue);
6805 /*
6806 * if (mem->pageout_queue)
6807 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
6808 * the caller is responsible for determing if the page is on that queue, and if so, must
6809 * either first remove it (it needs both the page queues lock and the object lock to do
6810 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
6811 */
6812 if (mem->local) {
6813 struct vpl *lq;
6814 assert(mem->object != kernel_object);
6815 assert(mem->object != compressor_object);
6816 assert(!mem->inactive && !mem->speculative);
6817 assert(!mem->active && !mem->throttled);
6818 assert(!mem->clean_queue);
6819 assert(!mem->fictitious);
6820 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
6821 VPL_LOCK(&lq->vpl_lock);
6822 queue_remove(&lq->vpl_queue,
6823 mem, vm_page_t, pageq);
6824 mem->local = FALSE;
6825 mem->local_id = 0;
6826 lq->vpl_count--;
6827 if (mem->object->internal) {
6828 lq->vpl_internal_count--;
6829 } else {
6830 lq->vpl_external_count--;
6831 }
6832 VPL_UNLOCK(&lq->vpl_lock);
6833 was_pageable = FALSE;
6834 }
6835
6836 else if (mem->active) {
6837 assert(mem->object != kernel_object);
6838 assert(mem->object != compressor_object);
6839 assert(!mem->inactive && !mem->speculative);
6840 assert(!mem->clean_queue);
6841 assert(!mem->throttled);
6842 assert(!mem->fictitious);
6843 queue_remove(&vm_page_queue_active,
6844 mem, vm_page_t, pageq);
6845 mem->active = FALSE;
6846 vm_page_active_count--;
6847 was_pageable = TRUE;
6848 }
6849
6850 else if (mem->inactive) {
6851 assert(mem->object != kernel_object);
6852 assert(mem->object != compressor_object);
6853 assert(!mem->active && !mem->speculative);
6854 assert(!mem->throttled);
6855 assert(!mem->fictitious);
6856 vm_page_inactive_count--;
6857 if (mem->clean_queue) {
6858 queue_remove(&vm_page_queue_cleaned,
6859 mem, vm_page_t, pageq);
6860 mem->clean_queue = FALSE;
6861 vm_page_cleaned_count--;
6862 } else {
6863 if (mem->object->internal) {
6864 queue_remove(&vm_page_queue_anonymous,
6865 mem, vm_page_t, pageq);
6866 vm_page_anonymous_count--;
6867 } else {
6868 queue_remove(&vm_page_queue_inactive,
6869 mem, vm_page_t, pageq);
6870 }
6871 vm_purgeable_q_advance_all();
6872 }
6873 mem->inactive = FALSE;
6874 was_pageable = TRUE;
6875 }
6876
6877 else if (mem->throttled) {
6878 assert(mem->object != compressor_object);
6879 assert(!mem->active && !mem->inactive);
6880 assert(!mem->speculative);
6881 assert(!mem->fictitious);
6882 queue_remove(&vm_page_queue_throttled,
6883 mem, vm_page_t, pageq);
6884 mem->throttled = FALSE;
6885 vm_page_throttled_count--;
6886 was_pageable = FALSE;
6887 }
6888
6889 else if (mem->speculative) {
6890 assert(mem->object != compressor_object);
6891 assert(!mem->active && !mem->inactive);
6892 assert(!mem->throttled);
6893 assert(!mem->fictitious);
6894 remque(&mem->pageq);
6895 mem->speculative = FALSE;
6896 vm_page_speculative_count--;
6897 was_pageable = TRUE;
6898 }
6899
6900 else if (mem->pageq.next || mem->pageq.prev) {
6901 was_pageable = FALSE;
6902 panic("vm_page_queues_remove: unmarked page on Q");
6903 } else {
6904 was_pageable = FALSE;
6905 }
6906
6907 mem->pageq.next = NULL;
6908 mem->pageq.prev = NULL;
6909 VM_PAGE_QUEUES_ASSERT(mem, 0);
6910 if (was_pageable) {
6911 if (mem->object->internal) {
6912 vm_page_pageable_internal_count--;
6913 } else {
6914 vm_page_pageable_external_count--;
6915 }
6916 }
6917}
6918
6919void
6920vm_page_remove_internal(vm_page_t page)
6921{
6922 vm_object_t __object = page->object;
6923 if (page == __object->memq_hint) {
6924 vm_page_t __new_hint;
6925 queue_entry_t __qe;
6926 __qe = queue_next(&page->listq);
6927 if (queue_end(&__object->memq, __qe)) {
6928 __qe = queue_prev(&page->listq);
6929 if (queue_end(&__object->memq, __qe)) {
6930 __qe = NULL;
6931 }
6932 }
6933 __new_hint = (vm_page_t) __qe;
6934 __object->memq_hint = __new_hint;
6935 }
6936 queue_remove(&__object->memq, page, vm_page_t, listq);
6937}
6938
6939void
6940vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
6941{
6942 VM_PAGE_QUEUES_ASSERT(mem, 0);
6943 assert(!mem->fictitious);
6944 assert(!mem->laundry);
6945 assert(!mem->pageout_queue);
6946 vm_page_check_pageable_safe(mem);
6947 if (mem->object->internal) {
6948 if (first == TRUE)
6949 queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
6950 else
6951 queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
6952 vm_page_anonymous_count++;
6953 vm_page_pageable_internal_count++;
6954 } else {
6955 if (first == TRUE)
6956 queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
6957 else
6958 queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
6959 vm_page_pageable_external_count++;
6960 }
6961 mem->inactive = TRUE;
6962 vm_page_inactive_count++;
6963 token_new_pagecount++;
6964}
6965
6966/*
6967 * Pages from special kernel objects shouldn't
6968 * be placed on pageable queues.
6969 */
6970void
6971vm_page_check_pageable_safe(vm_page_t page)
6972{
6973 if (page->object == kernel_object) {
6974 panic("vm_page_check_pageable_safe: trying to add page" \
6975 "from kernel object (%p) to pageable queue", kernel_object);
6976 }
6977
6978 if (page->object == compressor_object) {
6979 panic("vm_page_check_pageable_safe: trying to add page" \
6980 "from compressor object (%p) to pageable queue", compressor_object);
6981 }
6982
6983 if (page->object == vm_submap_object) {
6984 panic("vm_page_check_pageable_safe: trying to add page" \
6985 "from submap object (%p) to pageable queue", vm_submap_object);
6986 }
6987}
6988
6989/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
6990 * wired page diagnose
6991 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6992
6993#include <libkern/OSKextLibPrivate.h>
6994
6995vm_allocation_site_t *
6996vm_allocation_sites[VM_KERN_MEMORY_COUNT];
6997
6998vm_tag_t
6999vm_tag_bt(void)
7000{
7001 uintptr_t* frameptr;
7002 uintptr_t* frameptr_next;
7003 uintptr_t retaddr;
7004 uintptr_t kstackb, kstackt;
7005 const vm_allocation_site_t * site;
7006 thread_t cthread;
7007
7008 cthread = current_thread();
7009 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
7010
7011 kstackb = cthread->kernel_stack;
7012 kstackt = kstackb + kernel_stack_size;
7013
7014 /* Load stack frame pointer (EBP on x86) into frameptr */
7015 frameptr = __builtin_frame_address(0);
7016 site = NULL;
7017 while (frameptr != NULL)
7018 {
7019 /* Verify thread stack bounds */
7020 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
7021
7022 /* Next frame pointer is pointed to by the previous one */
7023 frameptr_next = (uintptr_t*) *frameptr;
7024
7025 /* Pull return address from one spot above the frame pointer */
7026 retaddr = *(frameptr + 1);
7027
7028 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
7029 {
7030 site = OSKextGetAllocationSiteForCaller(retaddr);
7031 break;
7032 }
7033
7034 frameptr = frameptr_next;
7035 }
7036 return (site ? site->tag : VM_KERN_MEMORY_NONE);
7037}
7038
7039static uint64_t free_tag_bits[256/64];
7040
7041void
7042vm_tag_alloc_locked(vm_allocation_site_t * site)
7043{
7044 vm_tag_t tag;
7045 uint64_t avail;
7046 uint64_t idx;
7047
7048 if (site->tag) return;
7049
7050 idx = 0;
7051 while (TRUE)
7052 {
7053 avail = free_tag_bits[idx];
7054 if (avail)
7055 {
7056 tag = __builtin_clzll(avail);
7057 avail &= ~(1ULL << (63 - tag));
7058 free_tag_bits[idx] = avail;
7059 tag += (idx << 6);
7060 break;
7061 }
7062 idx++;
7063 if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
7064 {
7065 tag = VM_KERN_MEMORY_ANY;
7066 break;
7067 }
7068 }
7069 site->tag = tag;
7070 if (VM_KERN_MEMORY_ANY != tag)
7071 {
7072 assert(!vm_allocation_sites[tag]);
7073 vm_allocation_sites[tag] = site;
7074 }
7075}
7076
7077static void
7078vm_tag_free_locked(vm_tag_t tag)
7079{
7080 uint64_t avail;
7081 uint32_t idx;
7082 uint64_t bit;
7083
7084 if (VM_KERN_MEMORY_ANY == tag) return;
7085
7086 idx = (tag >> 6);
7087 avail = free_tag_bits[idx];
7088 tag &= 63;
7089 bit = (1ULL << (63 - tag));
7090 assert(!(avail & bit));
7091 free_tag_bits[idx] = (avail | bit);
7092}
7093
7094static void
7095vm_tag_init(void)
7096{
7097 vm_tag_t tag;
7098 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
7099 {
7100 vm_tag_free_locked(tag);
7101 }
7102}
7103
7104vm_tag_t
7105vm_tag_alloc(vm_allocation_site_t * site)
7106{
7107 vm_tag_t tag;
7108
7109 if (VM_TAG_BT & site->flags)
7110 {
7111 tag = vm_tag_bt();
7112 if (VM_KERN_MEMORY_NONE != tag) return (tag);
7113 }
7114
7115 if (!site->tag)
7116 {
7117 lck_spin_lock(&vm_allocation_sites_lock);
7118 vm_tag_alloc_locked(site);
7119 lck_spin_unlock(&vm_allocation_sites_lock);
7120 }
7121
7122 return (site->tag);
7123}
7124
7125static void
7126vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
7127{
7128 if (!object->wired_page_count) return;
7129 if (object != kernel_object)
7130 {
7131 assert(object->wire_tag < num_sites);
7132 sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
7133 }
7134}
7135
7136typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites,
7137 unsigned int num_sites, vm_object_t object);
7138
7139static void
7140vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
7141 vm_page_iterate_proc proc, purgeable_q_t queue,
7142 int group)
7143{
7144 vm_object_t object;
7145
7146 for (object = (vm_object_t) queue_first(&queue->objq[group]);
7147 !queue_end(&queue->objq[group], (queue_entry_t) object);
7148 object = (vm_object_t) queue_next(&object->objq))
7149 {
7150 proc(sites, num_sites, object);
7151 }
7152}
7153
7154static void
7155vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
7156 vm_page_iterate_proc proc)
7157{
7158 purgeable_q_t volatile_q;
7159 queue_head_t * nonvolatile_q;
7160 vm_object_t object;
7161 int group;
7162
7163 lck_spin_lock(&vm_objects_wired_lock);
7164 queue_iterate(&vm_objects_wired,
7165 object,
7166 vm_object_t,
7167 objq)
7168 {
7169 proc(sites, num_sites, object);
7170 }
7171 lck_spin_unlock(&vm_objects_wired_lock);
7172
7173 lck_mtx_lock(&vm_purgeable_queue_lock);
7174 nonvolatile_q = &purgeable_nonvolatile_queue;
7175 for (object = (vm_object_t) queue_first(nonvolatile_q);
7176 !queue_end(nonvolatile_q, (queue_entry_t) object);
7177 object = (vm_object_t) queue_next(&object->objq))
7178 {
7179 proc(sites, num_sites, object);
7180 }
7181
7182 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
7183 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
7184
7185 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
7186 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
7187 {
7188 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
7189 }
7190
7191 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
7192 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
7193 {
7194 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
7195 }
7196 lck_mtx_unlock(&vm_purgeable_queue_lock);
7197}
7198
7199static uint64_t
7200process_account(mach_memory_info_t * sites, unsigned int __unused num_sites)
7201{
7202 uint64_t found;
7203 unsigned int idx;
7204 vm_allocation_site_t * site;
7205
7206 assert(num_sites >= VM_KERN_MEMORY_COUNT);
7207 found = 0;
7208 for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++)
7209 {
7210 found += sites[idx].size;
7211 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
7212 {
7213 sites[idx].site = idx;
7214 sites[idx].flags |= VM_KERN_SITE_TAG;
7215 if (VM_KERN_MEMORY_ZONE == idx) sites[idx].flags |= VM_KERN_SITE_HIDE;
7216 else sites[idx].flags |= VM_KERN_SITE_WIRED;
7217 continue;
7218 }
7219 lck_spin_lock(&vm_allocation_sites_lock);
7220 if ((site = vm_allocation_sites[idx]))
7221 {
7222 if (sites[idx].size)
7223 {
7224 sites[idx].flags |= VM_KERN_SITE_WIRED;
7225 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
7226 {
7227 sites[idx].site = OSKextGetKmodIDForSite(site);
7228 sites[idx].flags |= VM_KERN_SITE_KMOD;
7229 }
7230 else
7231 {
7232 sites[idx].site = VM_KERNEL_UNSLIDE(site);
7233 sites[idx].flags |= VM_KERN_SITE_KERNEL;
7234 }
7235 site = NULL;
7236 }
7237 else
7238 {
490019cf
A
7239#if 1
7240 site = NULL;
7241#else
7242 /* this code would free a site with no allocations but can race a new
7243 * allocation being made */
3e170ce0
A
7244 vm_tag_free_locked(site->tag);
7245 site->tag = VM_KERN_MEMORY_NONE;
7246 vm_allocation_sites[idx] = NULL;
7247 if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
490019cf 7248#endif
3e170ce0
A
7249 }
7250 }
7251 lck_spin_unlock(&vm_allocation_sites_lock);
7252 if (site) OSKextFreeSite(site);
7253 }
7254 return (found);
7255}
7256
7257kern_return_t
7258vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites)
7259{
7260 enum { kMaxKernelDepth = 1 };
7261 vm_map_t maps [kMaxKernelDepth];
7262 vm_map_entry_t entries[kMaxKernelDepth];
7263 vm_map_t map;
7264 vm_map_entry_t entry;
7265 vm_object_offset_t offset;
7266 vm_page_t page;
7267 int stackIdx, count;
7268 uint64_t wired_size;
7269 uint64_t wired_managed_size;
7270 uint64_t wired_reserved_size;
7271 mach_memory_info_t * counts;
7272
7273 bzero(sites, num_sites * sizeof(mach_memory_info_t));
7274
7275 vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
7276
7277 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
7278 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
7279 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
7280
7281 assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
7282 counts = &sites[VM_KERN_MEMORY_COUNT];
7283
7284#define SET_COUNT(xcount, xsize, xflags) \
7285 counts[xcount].site = (xcount); \
7286 counts[xcount].size = (xsize); \
7287 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
7288
7289 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
7290 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
7291 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
7292 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
7293 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
7294 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
7295
7296#define SET_MAP(xcount, xsize, xfree, xlargest) \
7297 counts[xcount].site = (xcount); \
7298 counts[xcount].size = (xsize); \
7299 counts[xcount].free = (xfree); \
7300 counts[xcount].largest = (xlargest); \
7301 counts[xcount].flags = VM_KERN_SITE_COUNTER;
7302
7303 vm_map_size_t map_size, map_free, map_largest;
7304
7305 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
7306 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
7307
7308 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
7309 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
7310
7311 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
7312 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
7313
7314 map = kernel_map;
7315 stackIdx = 0;
7316 while (map)
7317 {
7318 vm_map_lock(map);
7319 for (entry = map->hdr.links.next; map; entry = entry->links.next)
7320 {
7321 if (entry->is_sub_map)
7322 {
7323 assert(stackIdx < kMaxKernelDepth);
7324 maps[stackIdx] = map;
7325 entries[stackIdx] = entry;
7326 stackIdx++;
7327 map = VME_SUBMAP(entry);
7328 entry = NULL;
7329 break;
7330 }
7331 if (VME_OBJECT(entry) == kernel_object)
7332 {
7333 count = 0;
7334 vm_object_lock(VME_OBJECT(entry));
7335 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
7336 {
7337 page = vm_page_lookup(VME_OBJECT(entry), offset);
7338 if (page && VM_PAGE_WIRED(page)) count++;
7339 }
7340 vm_object_unlock(VME_OBJECT(entry));
7341
7342 if (count)
7343 {
7344 assert(VME_ALIAS(entry) < num_sites);
7345 sites[VME_ALIAS(entry)].size += ptoa_64(count);
7346 }
7347 }
7348 if (entry == vm_map_last_entry(map))
7349 {
7350 vm_map_unlock(map);
7351 if (!stackIdx) map = NULL;
7352 else
7353 {
7354 --stackIdx;
7355 map = maps[stackIdx];
7356 entry = entries[stackIdx];
7357 }
7358 }
7359 }
7360 }
7361
7362 process_account(sites, num_sites);
7363
7364 return (KERN_SUCCESS);
7365}