]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-2422.100.13.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
91447636 67
9bccf70c 68#include <mach/clock_types.h>
1c79356b
A
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
2d21ac55 71#include <mach/sdt.h>
1c79356b
A
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
b0d623f7 76#include <kern/kalloc.h>
1c79356b
A
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <vm/pmap.h>
80#include <vm/vm_init.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83#include <vm/vm_pageout.h>
84#include <vm/vm_kern.h> /* kernel_memory_allocate() */
85#include <kern/misc_protos.h>
86#include <zone_debug.h>
87#include <vm/cpm.h>
6d2010ae 88#include <pexpert/pexpert.h>
55e303ae 89
91447636 90#include <vm/vm_protos.h>
2d21ac55
A
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
39236c6e 93#include <vm/vm_compressor.h>
2d21ac55 94
b0d623f7
A
95#include <IOKit/IOHibernatePrivate.h>
96
b0d623f7
A
97#include <sys/kdebug.h>
98
316670eb 99boolean_t hibernate_cleaning_in_progress = FALSE;
b0d623f7
A
100boolean_t vm_page_free_verify = TRUE;
101
6d2010ae
A
102uint32_t vm_lopage_free_count = 0;
103uint32_t vm_lopage_free_limit = 0;
104uint32_t vm_lopage_lowater = 0;
0b4c1975
A
105boolean_t vm_lopage_refill = FALSE;
106boolean_t vm_lopage_needed = FALSE;
107
b0d623f7
A
108lck_mtx_ext_t vm_page_queue_lock_ext;
109lck_mtx_ext_t vm_page_queue_free_lock_ext;
110lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 111
0b4c1975
A
112int speculative_age_index = 0;
113int speculative_steal_index = 0;
2d21ac55
A
114struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
115
0b4e3aa0 116
b0d623f7
A
117__private_extern__ void vm_page_init_lck_grp(void);
118
6d2010ae
A
119static void vm_page_free_prepare(vm_page_t page);
120static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
121
b0d623f7
A
122
123
124
1c79356b
A
125/*
126 * Associated with page of user-allocatable memory is a
127 * page structure.
128 */
129
130/*
131 * These variables record the values returned by vm_page_bootstrap,
132 * for debugging purposes. The implementation of pmap_steal_memory
133 * and pmap_startup here also uses them internally.
134 */
135
136vm_offset_t virtual_space_start;
137vm_offset_t virtual_space_end;
7ddcb079 138uint32_t vm_page_pages;
1c79356b
A
139
140/*
141 * The vm_page_lookup() routine, which provides for fast
142 * (virtual memory object, offset) to page lookup, employs
143 * the following hash table. The vm_page_{insert,remove}
144 * routines install and remove associations in the table.
145 * [This table is often called the virtual-to-physical,
146 * or VP, table.]
147 */
148typedef struct {
149 vm_page_t pages;
150#if MACH_PAGE_HASH_STATS
151 int cur_count; /* current count */
152 int hi_count; /* high water mark */
153#endif /* MACH_PAGE_HASH_STATS */
154} vm_page_bucket_t;
155
b0d623f7
A
156
157#define BUCKETS_PER_LOCK 16
158
1c79356b
A
159vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
160unsigned int vm_page_bucket_count = 0; /* How big is array? */
161unsigned int vm_page_hash_mask; /* Mask for hash function */
162unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 163uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
164unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
165
166lck_spin_t *vm_page_bucket_locks;
1c79356b 167
15129b1c
A
168#if VM_PAGE_BUCKETS_CHECK
169boolean_t vm_page_buckets_check_ready = FALSE;
170#if VM_PAGE_FAKE_BUCKETS
171vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
172vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
173#endif /* VM_PAGE_FAKE_BUCKETS */
174#endif /* VM_PAGE_BUCKETS_CHECK */
91447636 175
1c79356b
A
176#if MACH_PAGE_HASH_STATS
177/* This routine is only for debug. It is intended to be called by
178 * hand by a developer using a kernel debugger. This routine prints
179 * out vm_page_hash table statistics to the kernel debug console.
180 */
181void
182hash_debug(void)
183{
184 int i;
185 int numbuckets = 0;
186 int highsum = 0;
187 int maxdepth = 0;
188
189 for (i = 0; i < vm_page_bucket_count; i++) {
190 if (vm_page_buckets[i].hi_count) {
191 numbuckets++;
192 highsum += vm_page_buckets[i].hi_count;
193 if (vm_page_buckets[i].hi_count > maxdepth)
194 maxdepth = vm_page_buckets[i].hi_count;
195 }
196 }
197 printf("Total number of buckets: %d\n", vm_page_bucket_count);
198 printf("Number used buckets: %d = %d%%\n",
199 numbuckets, 100*numbuckets/vm_page_bucket_count);
200 printf("Number unused buckets: %d = %d%%\n",
201 vm_page_bucket_count - numbuckets,
202 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
203 printf("Sum of bucket max depth: %d\n", highsum);
204 printf("Average bucket depth: %d.%2d\n",
205 highsum/vm_page_bucket_count,
206 highsum%vm_page_bucket_count);
207 printf("Maximum bucket depth: %d\n", maxdepth);
208}
209#endif /* MACH_PAGE_HASH_STATS */
210
211/*
212 * The virtual page size is currently implemented as a runtime
213 * variable, but is constant once initialized using vm_set_page_size.
214 * This initialization must be done in the machine-dependent
215 * bootstrap sequence, before calling other machine-independent
216 * initializations.
217 *
218 * All references to the virtual page size outside this
219 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
220 * constants.
221 */
55e303ae
A
222vm_size_t page_size = PAGE_SIZE;
223vm_size_t page_mask = PAGE_MASK;
2d21ac55 224int page_shift = PAGE_SHIFT;
1c79356b
A
225
226/*
227 * Resident page structures are initialized from
228 * a template (see vm_page_alloc).
229 *
230 * When adding a new field to the virtual memory
231 * object structure, be sure to add initialization
232 * (see vm_page_bootstrap).
233 */
234struct vm_page vm_page_template;
235
2d21ac55
A
236vm_page_t vm_pages = VM_PAGE_NULL;
237unsigned int vm_pages_count = 0;
0b4c1975 238ppnum_t vm_page_lowest = 0;
2d21ac55 239
1c79356b
A
240/*
241 * Resident pages that represent real memory
2d21ac55
A
242 * are allocated from a set of free lists,
243 * one per color.
1c79356b 244 */
2d21ac55
A
245unsigned int vm_colors;
246unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
247unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
248queue_head_t vm_page_queue_free[MAX_COLORS];
1c79356b 249unsigned int vm_page_free_wanted;
2d21ac55 250unsigned int vm_page_free_wanted_privileged;
91447636
A
251unsigned int vm_page_free_count;
252unsigned int vm_page_fictitious_count;
1c79356b
A
253
254unsigned int vm_page_free_count_minimum; /* debugging */
255
256/*
257 * Occasionally, the virtual memory system uses
258 * resident page structures that do not refer to
259 * real pages, for example to leave a page with
260 * important state information in the VP table.
261 *
262 * These page structures are allocated the way
263 * most other kernel structures are.
264 */
265zone_t vm_page_zone;
b0d623f7
A
266vm_locks_array_t vm_page_locks;
267decl_lck_mtx_data(,vm_page_alloc_lock)
316670eb
A
268lck_mtx_ext_t vm_page_alloc_lock_ext;
269
9bccf70c 270unsigned int io_throttle_zero_fill;
1c79356b 271
b0d623f7
A
272unsigned int vm_page_local_q_count = 0;
273unsigned int vm_page_local_q_soft_limit = 250;
274unsigned int vm_page_local_q_hard_limit = 500;
275struct vplq *vm_page_local_q = NULL;
276
316670eb
A
277/* N.B. Guard and fictitious pages must not
278 * be assigned a zero phys_page value.
279 */
1c79356b
A
280/*
281 * Fictitious pages don't have a physical address,
55e303ae 282 * but we must initialize phys_page to something.
1c79356b
A
283 * For debugging, this should be a strange value
284 * that the pmap module can recognize in assertions.
285 */
b0d623f7 286ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 287
2d21ac55
A
288/*
289 * Guard pages are not accessible so they don't
290 * need a physical address, but we need to enter
291 * one in the pmap.
292 * Let's make it recognizable and make sure that
293 * we don't use a real physical page with that
294 * physical address.
295 */
b0d623f7 296ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 297
1c79356b
A
298/*
299 * Resident page structures are also chained on
300 * queues that are used by the page replacement
301 * system (pageout daemon). These queues are
302 * defined here, but are shared by the pageout
9bccf70c 303 * module. The inactive queue is broken into
39236c6e 304 * file backed and anonymous for convenience as the
9bccf70c 305 * pageout daemon often assignes a higher
39236c6e 306 * importance to anonymous pages (less likely to pick)
1c79356b
A
307 */
308queue_head_t vm_page_queue_active;
309queue_head_t vm_page_queue_inactive;
316670eb 310queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
b0d623f7 311queue_head_t vm_page_queue_throttled;
2d21ac55 312
91447636
A
313unsigned int vm_page_active_count;
314unsigned int vm_page_inactive_count;
316670eb 315unsigned int vm_page_anonymous_count;
2d21ac55
A
316unsigned int vm_page_throttled_count;
317unsigned int vm_page_speculative_count;
91447636 318unsigned int vm_page_wire_count;
0b4c1975 319unsigned int vm_page_wire_count_initial;
91447636
A
320unsigned int vm_page_gobble_count = 0;
321unsigned int vm_page_wire_count_warning = 0;
322unsigned int vm_page_gobble_count_warning = 0;
323
324unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 325unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 326uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 327
39236c6e
A
328unsigned int vm_page_external_count = 0;
329unsigned int vm_page_internal_count = 0;
330unsigned int vm_page_pageable_external_count = 0;
331unsigned int vm_page_pageable_internal_count = 0;
332
b0d623f7 333#if DEVELOPMENT || DEBUG
2d21ac55
A
334unsigned int vm_page_speculative_recreated = 0;
335unsigned int vm_page_speculative_created = 0;
336unsigned int vm_page_speculative_used = 0;
b0d623f7 337#endif
2d21ac55 338
316670eb
A
339queue_head_t vm_page_queue_cleaned;
340
341unsigned int vm_page_cleaned_count = 0;
342unsigned int vm_pageout_enqueued_cleaned = 0;
343
0c530ab8 344uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
0b4c1975 345ppnum_t max_valid_low_ppnum = 0xffffffff;
0c530ab8
A
346
347
1c79356b
A
348/*
349 * Several page replacement parameters are also
350 * shared with this module, so that page allocation
351 * (done here in vm_page_alloc) can trigger the
352 * pageout daemon.
353 */
91447636
A
354unsigned int vm_page_free_target = 0;
355unsigned int vm_page_free_min = 0;
b0d623f7
A
356unsigned int vm_page_throttle_limit = 0;
357uint32_t vm_page_creation_throttle = 0;
91447636 358unsigned int vm_page_inactive_target = 0;
39236c6e 359unsigned int vm_page_anonymous_min = 0;
2d21ac55 360unsigned int vm_page_inactive_min = 0;
91447636 361unsigned int vm_page_free_reserved = 0;
b0d623f7 362unsigned int vm_page_throttle_count = 0;
1c79356b 363
316670eb 364
1c79356b
A
365/*
366 * The VM system has a couple of heuristics for deciding
367 * that pages are "uninteresting" and should be placed
368 * on the inactive queue as likely candidates for replacement.
369 * These variables let the heuristics be controlled at run-time
370 * to make experimentation easier.
371 */
372
373boolean_t vm_page_deactivate_hint = TRUE;
374
b0d623f7
A
375struct vm_page_stats_reusable vm_page_stats_reusable;
376
1c79356b
A
377/*
378 * vm_set_page_size:
379 *
380 * Sets the page size, perhaps based upon the memory
381 * size. Must be called before any use of page-size
382 * dependent functions.
383 *
384 * Sets page_shift and page_mask from page_size.
385 */
386void
387vm_set_page_size(void)
388{
1c79356b
A
389 page_mask = page_size - 1;
390
391 if ((page_mask & page_size) != 0)
392 panic("vm_set_page_size: page size not a power of two");
393
394 for (page_shift = 0; ; page_shift++)
91447636 395 if ((1U << page_shift) == page_size)
1c79356b 396 break;
1c79356b
A
397}
398
2d21ac55
A
399
400/* Called once during statup, once the cache geometry is known.
401 */
402static void
403vm_page_set_colors( void )
404{
405 unsigned int n, override;
406
593a1d5f 407 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
408 n = override;
409 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
410 n = vm_cache_geometry_colors;
411 else n = DEFAULT_COLORS; /* use default if all else fails */
412
413 if ( n == 0 )
414 n = 1;
415 if ( n > MAX_COLORS )
416 n = MAX_COLORS;
417
418 /* the count must be a power of 2 */
b0d623f7 419 if ( ( n & (n - 1)) != 0 )
2d21ac55
A
420 panic("vm_page_set_colors");
421
422 vm_colors = n;
423 vm_color_mask = n - 1;
424}
425
426
b0d623f7
A
427lck_grp_t vm_page_lck_grp_free;
428lck_grp_t vm_page_lck_grp_queue;
429lck_grp_t vm_page_lck_grp_local;
430lck_grp_t vm_page_lck_grp_purge;
431lck_grp_t vm_page_lck_grp_alloc;
432lck_grp_t vm_page_lck_grp_bucket;
433lck_grp_attr_t vm_page_lck_grp_attr;
434lck_attr_t vm_page_lck_attr;
435
436
437__private_extern__ void
438vm_page_init_lck_grp(void)
439{
440 /*
441 * initialze the vm_page lock world
442 */
443 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
444 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
445 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
446 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
447 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
448 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
449 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
450 lck_attr_setdefault(&vm_page_lck_attr);
316670eb 451 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
39236c6e
A
452
453 vm_compressor_init_locks();
b0d623f7
A
454}
455
456void
457vm_page_init_local_q()
458{
459 unsigned int num_cpus;
460 unsigned int i;
461 struct vplq *t_local_q;
462
463 num_cpus = ml_get_max_cpus();
464
465 /*
466 * no point in this for a uni-processor system
467 */
468 if (num_cpus >= 2) {
469 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
470
471 for (i = 0; i < num_cpus; i++) {
472 struct vpl *lq;
473
474 lq = &t_local_q[i].vpl_un.vpl;
475 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
476 queue_init(&lq->vpl_queue);
477 lq->vpl_count = 0;
39236c6e
A
478 lq->vpl_internal_count = 0;
479 lq->vpl_external_count = 0;
b0d623f7
A
480 }
481 vm_page_local_q_count = num_cpus;
482
483 vm_page_local_q = (struct vplq *)t_local_q;
484 }
485}
486
487
1c79356b
A
488/*
489 * vm_page_bootstrap:
490 *
491 * Initializes the resident memory module.
492 *
493 * Allocates memory for the page cells, and
494 * for the object/offset-to-page hash table headers.
495 * Each page cell is initialized and placed on the free list.
496 * Returns the range of available kernel virtual memory.
497 */
498
499void
500vm_page_bootstrap(
501 vm_offset_t *startp,
502 vm_offset_t *endp)
503{
504 register vm_page_t m;
91447636 505 unsigned int i;
1c79356b
A
506 unsigned int log1;
507 unsigned int log2;
508 unsigned int size;
509
510 /*
511 * Initialize the vm_page template.
512 */
513
514 m = &vm_page_template;
b0d623f7 515 bzero(m, sizeof (*m));
1c79356b 516
91447636
A
517 m->pageq.next = NULL;
518 m->pageq.prev = NULL;
519 m->listq.next = NULL;
520 m->listq.prev = NULL;
b0d623f7 521 m->next = VM_PAGE_NULL;
91447636 522
b0d623f7
A
523 m->object = VM_OBJECT_NULL; /* reset later */
524 m->offset = (vm_object_offset_t) -1; /* reset later */
525
526 m->wire_count = 0;
527 m->local = FALSE;
1c79356b
A
528 m->inactive = FALSE;
529 m->active = FALSE;
b0d623f7
A
530 m->pageout_queue = FALSE;
531 m->speculative = FALSE;
1c79356b
A
532 m->laundry = FALSE;
533 m->free = FALSE;
534 m->reference = FALSE;
b0d623f7
A
535 m->gobbled = FALSE;
536 m->private = FALSE;
537 m->throttled = FALSE;
538 m->__unused_pageq_bits = 0;
539
540 m->phys_page = 0; /* reset later */
1c79356b
A
541
542 m->busy = TRUE;
543 m->wanted = FALSE;
544 m->tabled = FALSE;
15129b1c 545 m->hashed = FALSE;
1c79356b 546 m->fictitious = FALSE;
b0d623f7
A
547 m->pmapped = FALSE;
548 m->wpmapped = FALSE;
549 m->pageout = FALSE;
1c79356b
A
550 m->absent = FALSE;
551 m->error = FALSE;
552 m->dirty = FALSE;
553 m->cleaning = FALSE;
554 m->precious = FALSE;
555 m->clustered = FALSE;
b0d623f7 556 m->overwriting = FALSE;
1c79356b 557 m->restart = FALSE;
b0d623f7 558 m->unusual = FALSE;
91447636 559 m->encrypted = FALSE;
2d21ac55 560 m->encrypted_cleaning = FALSE;
b0d623f7
A
561 m->cs_validated = FALSE;
562 m->cs_tainted = FALSE;
563 m->no_cache = FALSE;
b0d623f7 564 m->reusable = FALSE;
6d2010ae 565 m->slid = FALSE;
316670eb 566 m->was_dirty = FALSE;
39236c6e
A
567 m->xpmapped = FALSE;
568 m->compressor = FALSE;
15129b1c 569 m->written_by_kernel = FALSE;
b0d623f7 570 m->__unused_object_bits = 0;
1c79356b 571
1c79356b
A
572 /*
573 * Initialize the page queues.
574 */
b0d623f7
A
575 vm_page_init_lck_grp();
576
577 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
578 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
579 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
580
581 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
582 int group;
583
584 purgeable_queues[i].token_q_head = 0;
585 purgeable_queues[i].token_q_tail = 0;
586 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
587 queue_init(&purgeable_queues[i].objq[group]);
588
589 purgeable_queues[i].type = i;
590 purgeable_queues[i].new_pages = 0;
591#if MACH_ASSERT
592 purgeable_queues[i].debug_count_tokens = 0;
593 purgeable_queues[i].debug_count_objects = 0;
594#endif
595 };
596
597 for (i = 0; i < MAX_COLORS; i++ )
598 queue_init(&vm_page_queue_free[i]);
6d2010ae 599
2d21ac55 600 queue_init(&vm_lopage_queue_free);
1c79356b
A
601 queue_init(&vm_page_queue_active);
602 queue_init(&vm_page_queue_inactive);
316670eb 603 queue_init(&vm_page_queue_cleaned);
2d21ac55 604 queue_init(&vm_page_queue_throttled);
316670eb 605 queue_init(&vm_page_queue_anonymous);
1c79356b 606
2d21ac55
A
607 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
608 queue_init(&vm_page_queue_speculative[i].age_q);
609
610 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
611 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
612 }
1c79356b 613 vm_page_free_wanted = 0;
2d21ac55
A
614 vm_page_free_wanted_privileged = 0;
615
616 vm_page_set_colors();
617
1c79356b
A
618
619 /*
620 * Steal memory for the map and zone subsystems.
621 */
1c79356b 622 zone_steal_memory();
316670eb 623 vm_map_steal_memory();
1c79356b
A
624
625 /*
626 * Allocate (and initialize) the virtual-to-physical
627 * table hash buckets.
628 *
629 * The number of buckets should be a power of two to
630 * get a good hash function. The following computation
631 * chooses the first power of two that is greater
632 * than the number of physical pages in the system.
633 */
634
1c79356b
A
635 if (vm_page_bucket_count == 0) {
636 unsigned int npages = pmap_free_pages();
637
638 vm_page_bucket_count = 1;
639 while (vm_page_bucket_count < npages)
640 vm_page_bucket_count <<= 1;
641 }
b0d623f7 642 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
643
644 vm_page_hash_mask = vm_page_bucket_count - 1;
645
646 /*
647 * Calculate object shift value for hashing algorithm:
648 * O = log2(sizeof(struct vm_object))
649 * B = log2(vm_page_bucket_count)
650 * hash shifts the object left by
651 * B/2 - O
652 */
653 size = vm_page_bucket_count;
654 for (log1 = 0; size > 1; log1++)
655 size /= 2;
656 size = sizeof(struct vm_object);
657 for (log2 = 0; size > 1; log2++)
658 size /= 2;
659 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
660
661 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
662 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
663 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
664
665 if (vm_page_hash_mask & vm_page_bucket_count)
666 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
667
15129b1c
A
668#if VM_PAGE_BUCKETS_CHECK
669#if VM_PAGE_FAKE_BUCKETS
670 /*
671 * Allocate a decoy set of page buckets, to detect
672 * any stomping there.
673 */
674 vm_page_fake_buckets = (vm_page_bucket_t *)
675 pmap_steal_memory(vm_page_bucket_count *
676 sizeof(vm_page_bucket_t));
677 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
678 vm_page_fake_buckets_end =
679 vm_map_round_page((vm_page_fake_buckets_start +
680 (vm_page_bucket_count *
681 sizeof (vm_page_bucket_t))),
682 PAGE_MASK);
683 char *cp;
684 for (cp = (char *)vm_page_fake_buckets_start;
685 cp < (char *)vm_page_fake_buckets_end;
686 cp++) {
687 *cp = 0x5a;
688 }
689#endif /* VM_PAGE_FAKE_BUCKETS */
690#endif /* VM_PAGE_BUCKETS_CHECK */
691
1c79356b
A
692 vm_page_buckets = (vm_page_bucket_t *)
693 pmap_steal_memory(vm_page_bucket_count *
694 sizeof(vm_page_bucket_t));
695
b0d623f7
A
696 vm_page_bucket_locks = (lck_spin_t *)
697 pmap_steal_memory(vm_page_bucket_lock_count *
698 sizeof(lck_spin_t));
699
1c79356b
A
700 for (i = 0; i < vm_page_bucket_count; i++) {
701 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
702
703 bucket->pages = VM_PAGE_NULL;
704#if MACH_PAGE_HASH_STATS
705 bucket->cur_count = 0;
706 bucket->hi_count = 0;
707#endif /* MACH_PAGE_HASH_STATS */
708 }
709
b0d623f7
A
710 for (i = 0; i < vm_page_bucket_lock_count; i++)
711 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
712
15129b1c
A
713#if VM_PAGE_BUCKETS_CHECK
714 vm_page_buckets_check_ready = TRUE;
715#endif /* VM_PAGE_BUCKETS_CHECK */
716
1c79356b
A
717 /*
718 * Machine-dependent code allocates the resident page table.
719 * It uses vm_page_init to initialize the page frames.
720 * The code also returns to us the virtual space available
721 * to the kernel. We don't trust the pmap module
722 * to get the alignment right.
723 */
724
725 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
726 virtual_space_start = round_page(virtual_space_start);
727 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
728
729 *startp = virtual_space_start;
730 *endp = virtual_space_end;
731
732 /*
733 * Compute the initial "wire" count.
734 * Up until now, the pages which have been set aside are not under
735 * the VM system's control, so although they aren't explicitly
736 * wired, they nonetheless can't be moved. At this moment,
737 * all VM managed pages are "free", courtesy of pmap_startup.
738 */
b0d623f7 739 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0b4c1975
A
740 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
741 vm_page_wire_count_initial = vm_page_wire_count;
1c79356b 742 vm_page_free_count_minimum = vm_page_free_count;
91447636 743
2d21ac55
A
744 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
745 vm_page_free_count, vm_page_wire_count);
746
91447636 747 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
748}
749
750#ifndef MACHINE_PAGES
751/*
752 * We implement pmap_steal_memory and pmap_startup with the help
753 * of two simpler functions, pmap_virtual_space and pmap_next_page.
754 */
755
91447636 756void *
1c79356b
A
757pmap_steal_memory(
758 vm_size_t size)
759{
55e303ae
A
760 vm_offset_t addr, vaddr;
761 ppnum_t phys_page;
1c79356b
A
762
763 /*
764 * We round the size to a round multiple.
765 */
766
767 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
768
769 /*
770 * If this is the first call to pmap_steal_memory,
771 * we have to initialize ourself.
772 */
773
774 if (virtual_space_start == virtual_space_end) {
775 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
776
777 /*
778 * The initial values must be aligned properly, and
779 * we don't trust the pmap module to do it right.
780 */
781
91447636
A
782 virtual_space_start = round_page(virtual_space_start);
783 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
784 }
785
786 /*
787 * Allocate virtual memory for this request.
788 */
789
790 addr = virtual_space_start;
791 virtual_space_start += size;
792
6d2010ae 793 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
794
795 /*
796 * Allocate and map physical pages to back new virtual pages.
797 */
798
91447636 799 for (vaddr = round_page(addr);
1c79356b
A
800 vaddr < addr + size;
801 vaddr += PAGE_SIZE) {
b0d623f7 802
0b4c1975 803 if (!pmap_next_page_hi(&phys_page))
1c79356b
A
804 panic("pmap_steal_memory");
805
806 /*
807 * XXX Logically, these mappings should be wired,
808 * but some pmap modules barf if they are.
809 */
b0d623f7
A
810#if defined(__LP64__)
811 pmap_pre_expand(kernel_pmap, vaddr);
812#endif
1c79356b 813
55e303ae 814 pmap_enter(kernel_pmap, vaddr, phys_page,
316670eb 815 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
9bccf70c 816 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
817 /*
818 * Account for newly stolen memory
819 */
820 vm_page_wire_count++;
821
822 }
823
91447636 824 return (void *) addr;
1c79356b
A
825}
826
827void
828pmap_startup(
829 vm_offset_t *startp,
830 vm_offset_t *endp)
831{
55e303ae 832 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
833 ppnum_t phys_page;
834 addr64_t tmpaddr;
1c79356b
A
835
836 /*
837 * We calculate how many page frames we will have
838 * and then allocate the page structures in one chunk.
839 */
840
55e303ae 841 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 842 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 843 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 844
2d21ac55 845 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
846
847 /*
848 * Initialize the page frames.
849 */
1c79356b 850 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 851 if (!pmap_next_page(&phys_page))
1c79356b 852 break;
0b4c1975
A
853 if (pages_initialized == 0 || phys_page < vm_page_lowest)
854 vm_page_lowest = phys_page;
1c79356b 855
0b4c1975 856 vm_page_init(&vm_pages[i], phys_page, FALSE);
1c79356b
A
857 vm_page_pages++;
858 pages_initialized++;
859 }
2d21ac55 860 vm_pages_count = pages_initialized;
1c79356b 861
0c530ab8
A
862 /*
863 * Check if we want to initialize pages to a known value
864 */
865 fill = 0; /* Assume no fill */
593a1d5f 866 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
316670eb
A
867#if DEBUG
868 /* This slows down booting the DEBUG kernel, particularly on
869 * large memory systems, but is worthwhile in deterministically
870 * trapping uninitialized memory usage.
871 */
872 if (fill == 0) {
873 fill = 1;
874 fillval = 0xDEB8F177;
875 }
876#endif
877 if (fill)
878 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
0c530ab8
A
879 // -debug code remove
880 if (2 == vm_himemory_mode) {
881 // free low -> high so high is preferred
0b4c1975 882 for (i = 1; i <= pages_initialized; i++) {
2d21ac55
A
883 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
884 vm_page_release(&vm_pages[i - 1]);
0c530ab8
A
885 }
886 }
887 else
888 // debug code remove-
889
1c79356b
A
890 /*
891 * Release pages in reverse order so that physical pages
892 * initially get allocated in ascending addresses. This keeps
893 * the devices (which must address physical memory) happy if
894 * they require several consecutive pages.
895 */
0b4c1975 896 for (i = pages_initialized; i > 0; i--) {
2d21ac55
A
897 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
898 vm_page_release(&vm_pages[i - 1]);
1c79356b
A
899 }
900
55e303ae
A
901#if 0
902 {
903 vm_page_t xx, xxo, xxl;
2d21ac55 904 int i, j, k, l;
55e303ae
A
905
906 j = 0; /* (BRINGUP) */
907 xxl = 0;
908
2d21ac55
A
909 for( i = 0; i < vm_colors; i++ ) {
910 queue_iterate(&vm_page_queue_free[i],
911 xx,
912 vm_page_t,
913 pageq) { /* BRINGUP */
914 j++; /* (BRINGUP) */
915 if(j > vm_page_free_count) { /* (BRINGUP) */
916 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 917 }
2d21ac55
A
918
919 l = vm_page_free_count - j; /* (BRINGUP) */
920 k = 0; /* (BRINGUP) */
921
922 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
923
924 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
925 k++;
926 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
927 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
928 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
929 }
930 }
931
932 xxl = xx;
55e303ae
A
933 }
934 }
935
936 if(j != vm_page_free_count) { /* (BRINGUP) */
937 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
938 }
939 }
940#endif
941
942
1c79356b
A
943 /*
944 * We have to re-align virtual_space_start,
945 * because pmap_steal_memory has been using it.
946 */
947
b0d623f7 948 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
949
950 *startp = virtual_space_start;
951 *endp = virtual_space_end;
952}
953#endif /* MACHINE_PAGES */
954
955/*
956 * Routine: vm_page_module_init
957 * Purpose:
958 * Second initialization pass, to be done after
959 * the basic VM system is ready.
960 */
961void
962vm_page_module_init(void)
963{
964 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
965 0, PAGE_SIZE, "vm pages");
966
967#if ZONE_DEBUG
968 zone_debug_disable(vm_page_zone);
969#endif /* ZONE_DEBUG */
970
6d2010ae 971 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1c79356b
A
972 zone_change(vm_page_zone, Z_EXPAND, FALSE);
973 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
974 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
316670eb 975 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b
A
976 /*
977 * Adjust zone statistics to account for the real pages allocated
978 * in vm_page_create(). [Q: is this really what we want?]
979 */
980 vm_page_zone->count += vm_page_pages;
6d2010ae 981 vm_page_zone->sum_count += vm_page_pages;
1c79356b 982 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1c79356b
A
983}
984
985/*
986 * Routine: vm_page_create
987 * Purpose:
988 * After the VM system is up, machine-dependent code
989 * may stumble across more physical memory. For example,
990 * memory that it was reserving for a frame buffer.
991 * vm_page_create turns this memory into available pages.
992 */
993
994void
995vm_page_create(
55e303ae
A
996 ppnum_t start,
997 ppnum_t end)
1c79356b 998{
55e303ae
A
999 ppnum_t phys_page;
1000 vm_page_t m;
1c79356b 1001
55e303ae
A
1002 for (phys_page = start;
1003 phys_page < end;
1004 phys_page++) {
6d2010ae 1005 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1c79356b
A
1006 == VM_PAGE_NULL)
1007 vm_page_more_fictitious();
1008
6d2010ae 1009 m->fictitious = FALSE;
0b4c1975 1010 pmap_clear_noencrypt(phys_page);
6d2010ae 1011
1c79356b
A
1012 vm_page_pages++;
1013 vm_page_release(m);
1014 }
1015}
1016
1017/*
1018 * vm_page_hash:
1019 *
1020 * Distributes the object/offset key pair among hash buckets.
1021 *
55e303ae 1022 * NOTE: The bucket count must be a power of 2
1c79356b
A
1023 */
1024#define vm_page_hash(object, offset) (\
b0d623f7 1025 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
1026 & vm_page_hash_mask)
1027
2d21ac55 1028
1c79356b
A
1029/*
1030 * vm_page_insert: [ internal use only ]
1031 *
1032 * Inserts the given mem entry into the object/object-page
1033 * table and object list.
1034 *
1035 * The object must be locked.
1036 */
1c79356b
A
1037void
1038vm_page_insert(
2d21ac55
A
1039 vm_page_t mem,
1040 vm_object_t object,
1041 vm_object_offset_t offset)
1042{
316670eb 1043 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
2d21ac55
A
1044}
1045
4a3eedf9 1046void
2d21ac55
A
1047vm_page_insert_internal(
1048 vm_page_t mem,
1049 vm_object_t object,
1050 vm_object_offset_t offset,
b0d623f7 1051 boolean_t queues_lock_held,
316670eb
A
1052 boolean_t insert_in_hash,
1053 boolean_t batch_pmap_op)
1c79356b 1054{
b0d623f7
A
1055 vm_page_bucket_t *bucket;
1056 lck_spin_t *bucket_lock;
1057 int hash_id;
1c79356b
A
1058
1059 XPR(XPR_VM_PAGE,
1060 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 1061 object, offset, mem, 0,0);
316670eb
A
1062#if 0
1063 /*
1064 * we may not hold the page queue lock
1065 * so this check isn't safe to make
1066 */
1c79356b 1067 VM_PAGE_CHECK(mem);
316670eb 1068#endif
1c79356b 1069
39236c6e
A
1070 assert(page_aligned(offset));
1071
2d21ac55
A
1072 if (object == vm_submap_object) {
1073 /* the vm_submap_object is only a placeholder for submaps */
1074 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1075 }
1076
1077 vm_object_lock_assert_exclusive(object);
1078#if DEBUG
b0d623f7
A
1079 lck_mtx_assert(&vm_page_queue_lock,
1080 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1081 : LCK_MTX_ASSERT_NOTOWNED);
1082#endif /* DEBUG */
1083
1084 if (insert_in_hash == TRUE) {
15129b1c 1085#if DEBUG || VM_PAGE_CHECK_BUCKETS
b0d623f7
A
1086 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1087 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1088 "already in (obj=%p,off=0x%llx)",
1089 mem, object, offset, mem->object, mem->offset);
91447636 1090#endif
6d2010ae 1091 assert(!object->internal || offset < object->vo_size);
1c79356b 1092
b0d623f7
A
1093 /* only insert "pageout" pages into "pageout" objects,
1094 * and normal pages into normal objects */
1095 assert(object->pageout == mem->pageout);
91447636 1096
b0d623f7
A
1097 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1098
1099 /*
1100 * Record the object/offset pair in this page
1101 */
1c79356b 1102
b0d623f7
A
1103 mem->object = object;
1104 mem->offset = offset;
1c79356b 1105
b0d623f7
A
1106 /*
1107 * Insert it into the object_object/offset hash table
1108 */
1109 hash_id = vm_page_hash(object, offset);
1110 bucket = &vm_page_buckets[hash_id];
1111 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1112
1113 lck_spin_lock(bucket_lock);
1c79356b 1114
b0d623f7
A
1115 mem->next = bucket->pages;
1116 bucket->pages = mem;
1c79356b 1117#if MACH_PAGE_HASH_STATS
b0d623f7
A
1118 if (++bucket->cur_count > bucket->hi_count)
1119 bucket->hi_count = bucket->cur_count;
1c79356b 1120#endif /* MACH_PAGE_HASH_STATS */
15129b1c 1121 mem->hashed = TRUE;
b0d623f7
A
1122 lck_spin_unlock(bucket_lock);
1123 }
6d2010ae 1124
316670eb
A
1125 {
1126 unsigned int cache_attr;
6d2010ae
A
1127
1128 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1129
1130 if (cache_attr != VM_WIMG_USE_DEFAULT) {
316670eb 1131 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
6d2010ae
A
1132 }
1133 }
1c79356b
A
1134 /*
1135 * Now link into the object's list of backed pages.
1136 */
91447636 1137 VM_PAGE_INSERT(mem, object);
1c79356b
A
1138 mem->tabled = TRUE;
1139
1140 /*
1141 * Show that the object has one more resident page.
1142 */
1143
1144 object->resident_page_count++;
b0d623f7
A
1145 if (VM_PAGE_WIRED(mem)) {
1146 object->wired_page_count++;
1147 }
1148 assert(object->resident_page_count >= object->wired_page_count);
91447636 1149
39236c6e
A
1150 if (object->internal) {
1151 OSAddAtomic(1, &vm_page_internal_count);
1152 } else {
1153 OSAddAtomic(1, &vm_page_external_count);
1154 }
1155
1156 /*
1157 * It wouldn't make sense to insert a "reusable" page in
1158 * an object (the page would have been marked "reusable" only
1159 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1160 * in the object at that time).
1161 * But a page could be inserted in a "all_reusable" object, if
1162 * something faults it in (a vm_read() from another task or a
1163 * "use-after-free" issue in user space, for example). It can
1164 * also happen if we're relocating a page from that object to
1165 * a different physical page during a physically-contiguous
1166 * allocation.
1167 */
b0d623f7 1168 assert(!mem->reusable);
39236c6e
A
1169 if (mem->object->all_reusable) {
1170 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1171 }
2d21ac55 1172
b0d623f7
A
1173 if (object->purgable == VM_PURGABLE_VOLATILE) {
1174 if (VM_PAGE_WIRED(mem)) {
1175 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1176 } else {
1177 OSAddAtomic(1, &vm_page_purgeable_count);
1178 }
593a1d5f
A
1179 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1180 mem->throttled) {
b0d623f7
A
1181 /*
1182 * This page belongs to a purged VM object but hasn't
1183 * been purged (because it was "busy").
1184 * It's in the "throttled" queue and hence not
1185 * visible to vm_pageout_scan(). Move it to a pageable
1186 * queue, so that it can eventually be reclaimed, instead
1187 * of lingering in the "empty" object.
1188 */
593a1d5f 1189 if (queues_lock_held == FALSE)
b0d623f7 1190 vm_page_lockspin_queues();
593a1d5f 1191 vm_page_deactivate(mem);
2d21ac55
A
1192 if (queues_lock_held == FALSE)
1193 vm_page_unlock_queues();
91447636 1194 }
1c79356b
A
1195}
1196
1197/*
1198 * vm_page_replace:
1199 *
1200 * Exactly like vm_page_insert, except that we first
1201 * remove any existing page at the given offset in object.
1202 *
b0d623f7 1203 * The object must be locked.
1c79356b 1204 */
1c79356b
A
1205void
1206vm_page_replace(
1207 register vm_page_t mem,
1208 register vm_object_t object,
1209 register vm_object_offset_t offset)
1210{
0c530ab8
A
1211 vm_page_bucket_t *bucket;
1212 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1213 lck_spin_t *bucket_lock;
1214 int hash_id;
1c79356b 1215
316670eb
A
1216#if 0
1217 /*
1218 * we don't hold the page queue lock
1219 * so this check isn't safe to make
1220 */
1c79356b 1221 VM_PAGE_CHECK(mem);
316670eb 1222#endif
2d21ac55 1223 vm_object_lock_assert_exclusive(object);
15129b1c 1224#if DEBUG || VM_PAGE_CHECK_BUCKETS
91447636
A
1225 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1226 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1227 "already in (obj=%p,off=0x%llx)",
1228 mem, object, offset, mem->object, mem->offset);
b0d623f7 1229 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
91447636 1230#endif
1c79356b
A
1231 /*
1232 * Record the object/offset pair in this page
1233 */
1234
1235 mem->object = object;
1236 mem->offset = offset;
1237
1238 /*
1239 * Insert it into the object_object/offset hash table,
1240 * replacing any page that might have been there.
1241 */
1242
b0d623f7
A
1243 hash_id = vm_page_hash(object, offset);
1244 bucket = &vm_page_buckets[hash_id];
1245 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1246
1247 lck_spin_lock(bucket_lock);
0c530ab8 1248
1c79356b
A
1249 if (bucket->pages) {
1250 vm_page_t *mp = &bucket->pages;
b0d623f7 1251 vm_page_t m = *mp;
0c530ab8 1252
1c79356b
A
1253 do {
1254 if (m->object == object && m->offset == offset) {
1255 /*
0c530ab8 1256 * Remove old page from hash list
1c79356b
A
1257 */
1258 *mp = m->next;
15129b1c 1259 m->hashed = FALSE;
1c79356b 1260
0c530ab8 1261 found_m = m;
1c79356b
A
1262 break;
1263 }
1264 mp = &m->next;
91447636 1265 } while ((m = *mp));
0c530ab8 1266
1c79356b
A
1267 mem->next = bucket->pages;
1268 } else {
1269 mem->next = VM_PAGE_NULL;
1270 }
0c530ab8
A
1271 /*
1272 * insert new page at head of hash list
1273 */
1c79356b 1274 bucket->pages = mem;
15129b1c 1275 mem->hashed = TRUE;
0c530ab8 1276
b0d623f7 1277 lck_spin_unlock(bucket_lock);
1c79356b 1278
0c530ab8
A
1279 if (found_m) {
1280 /*
1281 * there was already a page at the specified
1282 * offset for this object... remove it from
1283 * the object and free it back to the free list
1284 */
b0d623f7 1285 vm_page_free_unlocked(found_m, FALSE);
91447636 1286 }
316670eb 1287 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1c79356b
A
1288}
1289
1290/*
1291 * vm_page_remove: [ internal use only ]
1292 *
1293 * Removes the given mem entry from the object/offset-page
1294 * table and the object page list.
1295 *
b0d623f7 1296 * The object must be locked.
1c79356b
A
1297 */
1298
1299void
1300vm_page_remove(
b0d623f7
A
1301 vm_page_t mem,
1302 boolean_t remove_from_hash)
1c79356b 1303{
b0d623f7
A
1304 vm_page_bucket_t *bucket;
1305 vm_page_t this;
1306 lck_spin_t *bucket_lock;
1307 int hash_id;
1c79356b
A
1308
1309 XPR(XPR_VM_PAGE,
1310 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7
A
1311 mem->object, mem->offset,
1312 mem, 0,0);
1313
2d21ac55 1314 vm_object_lock_assert_exclusive(mem->object);
1c79356b
A
1315 assert(mem->tabled);
1316 assert(!mem->cleaning);
316670eb
A
1317 assert(!mem->laundry);
1318#if 0
1319 /*
1320 * we don't hold the page queue lock
1321 * so this check isn't safe to make
1322 */
1c79356b 1323 VM_PAGE_CHECK(mem);
316670eb 1324#endif
b0d623f7
A
1325 if (remove_from_hash == TRUE) {
1326 /*
1327 * Remove from the object_object/offset hash table
1328 */
1329 hash_id = vm_page_hash(mem->object, mem->offset);
1330 bucket = &vm_page_buckets[hash_id];
1331 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1332
b0d623f7 1333 lck_spin_lock(bucket_lock);
1c79356b 1334
b0d623f7
A
1335 if ((this = bucket->pages) == mem) {
1336 /* optimize for common case */
1c79356b 1337
b0d623f7
A
1338 bucket->pages = mem->next;
1339 } else {
1340 vm_page_t *prev;
1c79356b 1341
b0d623f7
A
1342 for (prev = &this->next;
1343 (this = *prev) != mem;
1344 prev = &this->next)
1345 continue;
1346 *prev = this->next;
1347 }
1c79356b 1348#if MACH_PAGE_HASH_STATS
b0d623f7 1349 bucket->cur_count--;
1c79356b 1350#endif /* MACH_PAGE_HASH_STATS */
15129b1c 1351 mem->hashed = FALSE;
b0d623f7
A
1352 lck_spin_unlock(bucket_lock);
1353 }
1c79356b
A
1354 /*
1355 * Now remove from the object's list of backed pages.
1356 */
1357
91447636 1358 VM_PAGE_REMOVE(mem);
1c79356b
A
1359
1360 /*
1361 * And show that the object has one fewer resident
1362 * page.
1363 */
1364
b0d623f7 1365 assert(mem->object->resident_page_count > 0);
1c79356b 1366 mem->object->resident_page_count--;
6d2010ae 1367
39236c6e
A
1368 if (mem->object->internal) {
1369 assert(vm_page_internal_count);
1370 OSAddAtomic(-1, &vm_page_internal_count);
1371 } else {
1372 assert(vm_page_external_count);
1373 OSAddAtomic(-1, &vm_page_external_count);
1374 }
6d2010ae
A
1375 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1376 if (mem->object->resident_page_count == 0)
1377 vm_object_cache_remove(mem->object);
1378 }
1379
b0d623f7
A
1380 if (VM_PAGE_WIRED(mem)) {
1381 assert(mem->object->wired_page_count > 0);
1382 mem->object->wired_page_count--;
1383 }
1384 assert(mem->object->resident_page_count >=
1385 mem->object->wired_page_count);
1386 if (mem->reusable) {
1387 assert(mem->object->reusable_page_count > 0);
1388 mem->object->reusable_page_count--;
1389 assert(mem->object->reusable_page_count <=
1390 mem->object->resident_page_count);
1391 mem->reusable = FALSE;
1392 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1393 vm_page_stats_reusable.reused_remove++;
1394 } else if (mem->object->all_reusable) {
1395 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1396 vm_page_stats_reusable.reused_remove++;
1397 }
1c79356b 1398
593a1d5f 1399 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1400 if (VM_PAGE_WIRED(mem)) {
1401 assert(vm_page_purgeable_wired_count > 0);
1402 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1403 } else {
1404 assert(vm_page_purgeable_count > 0);
1405 OSAddAtomic(-1, &vm_page_purgeable_count);
1406 }
91447636 1407 }
6d2010ae
A
1408 if (mem->object->set_cache_attr == TRUE)
1409 pmap_set_cache_attributes(mem->phys_page, 0);
1410
1c79356b
A
1411 mem->tabled = FALSE;
1412 mem->object = VM_OBJECT_NULL;
91447636 1413 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1414}
1415
b0d623f7 1416
1c79356b
A
1417/*
1418 * vm_page_lookup:
1419 *
1420 * Returns the page associated with the object/offset
1421 * pair specified; if none is found, VM_PAGE_NULL is returned.
1422 *
1423 * The object must be locked. No side effects.
1424 */
1425
91447636
A
1426unsigned long vm_page_lookup_hint = 0;
1427unsigned long vm_page_lookup_hint_next = 0;
1428unsigned long vm_page_lookup_hint_prev = 0;
1429unsigned long vm_page_lookup_hint_miss = 0;
2d21ac55
A
1430unsigned long vm_page_lookup_bucket_NULL = 0;
1431unsigned long vm_page_lookup_miss = 0;
1432
91447636 1433
1c79356b
A
1434vm_page_t
1435vm_page_lookup(
b0d623f7
A
1436 vm_object_t object,
1437 vm_object_offset_t offset)
1c79356b 1438{
b0d623f7
A
1439 vm_page_t mem;
1440 vm_page_bucket_t *bucket;
1441 queue_entry_t qe;
1442 lck_spin_t *bucket_lock;
1443 int hash_id;
91447636 1444
2d21ac55 1445 vm_object_lock_assert_held(object);
91447636 1446 mem = object->memq_hint;
2d21ac55 1447
91447636
A
1448 if (mem != VM_PAGE_NULL) {
1449 assert(mem->object == object);
2d21ac55 1450
91447636
A
1451 if (mem->offset == offset) {
1452 vm_page_lookup_hint++;
1453 return mem;
1454 }
1455 qe = queue_next(&mem->listq);
2d21ac55 1456
91447636
A
1457 if (! queue_end(&object->memq, qe)) {
1458 vm_page_t next_page;
1459
1460 next_page = (vm_page_t) qe;
1461 assert(next_page->object == object);
2d21ac55 1462
91447636
A
1463 if (next_page->offset == offset) {
1464 vm_page_lookup_hint_next++;
1465 object->memq_hint = next_page; /* new hint */
1466 return next_page;
1467 }
1468 }
1469 qe = queue_prev(&mem->listq);
2d21ac55 1470
91447636
A
1471 if (! queue_end(&object->memq, qe)) {
1472 vm_page_t prev_page;
1473
1474 prev_page = (vm_page_t) qe;
1475 assert(prev_page->object == object);
2d21ac55 1476
91447636
A
1477 if (prev_page->offset == offset) {
1478 vm_page_lookup_hint_prev++;
1479 object->memq_hint = prev_page; /* new hint */
1480 return prev_page;
1481 }
1482 }
1483 }
1c79356b 1484 /*
2d21ac55 1485 * Search the hash table for this object/offset pair
1c79356b 1486 */
b0d623f7
A
1487 hash_id = vm_page_hash(object, offset);
1488 bucket = &vm_page_buckets[hash_id];
1c79356b 1489
2d21ac55
A
1490 /*
1491 * since we hold the object lock, we are guaranteed that no
1492 * new pages can be inserted into this object... this in turn
1493 * guarantess that the page we're looking for can't exist
1494 * if the bucket it hashes to is currently NULL even when looked
1495 * at outside the scope of the hash bucket lock... this is a
1496 * really cheap optimiztion to avoid taking the lock
1497 */
1498 if (bucket->pages == VM_PAGE_NULL) {
1499 vm_page_lookup_bucket_NULL++;
1500
1501 return (VM_PAGE_NULL);
1502 }
b0d623f7
A
1503 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1504
1505 lck_spin_lock(bucket_lock);
0c530ab8 1506
1c79356b 1507 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
316670eb
A
1508#if 0
1509 /*
1510 * we don't hold the page queue lock
1511 * so this check isn't safe to make
1512 */
1c79356b 1513 VM_PAGE_CHECK(mem);
316670eb 1514#endif
1c79356b
A
1515 if ((mem->object == object) && (mem->offset == offset))
1516 break;
1517 }
b0d623f7 1518 lck_spin_unlock(bucket_lock);
55e303ae 1519
91447636
A
1520 if (mem != VM_PAGE_NULL) {
1521 if (object->memq_hint != VM_PAGE_NULL) {
1522 vm_page_lookup_hint_miss++;
1523 }
1524 assert(mem->object == object);
1525 object->memq_hint = mem;
2d21ac55
A
1526 } else
1527 vm_page_lookup_miss++;
91447636
A
1528
1529 return(mem);
1530}
1531
1532
1c79356b
A
1533/*
1534 * vm_page_rename:
1535 *
1536 * Move the given memory entry from its
1537 * current object to the specified target object/offset.
1538 *
1539 * The object must be locked.
1540 */
1541void
1542vm_page_rename(
1543 register vm_page_t mem,
1544 register vm_object_t new_object,
2d21ac55
A
1545 vm_object_offset_t new_offset,
1546 boolean_t encrypted_ok)
1c79356b 1547{
39236c6e
A
1548 boolean_t internal_to_external, external_to_internal;
1549
1c79356b 1550 assert(mem->object != new_object);
2d21ac55 1551
91447636
A
1552 /*
1553 * ENCRYPTED SWAP:
1554 * The encryption key is based on the page's memory object
1555 * (aka "pager") and paging offset. Moving the page to
1556 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
1557 * so it has to be decrypted first, or we would lose the key.
1558 *
1559 * One exception is VM object collapsing, where we transfer pages
1560 * from one backing object to its parent object. This operation also
1561 * transfers the paging information, so the <pager,paging_offset> info
1562 * should remain consistent. The caller (vm_object_do_collapse())
1563 * sets "encrypted_ok" in this case.
91447636 1564 */
2d21ac55 1565 if (!encrypted_ok && mem->encrypted) {
91447636
A
1566 panic("vm_page_rename: page %p is encrypted\n", mem);
1567 }
2d21ac55 1568
b0d623f7
A
1569 XPR(XPR_VM_PAGE,
1570 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1571 new_object, new_offset,
1572 mem, 0,0);
1573
1c79356b
A
1574 /*
1575 * Changes to mem->object require the page lock because
1576 * the pageout daemon uses that lock to get the object.
1577 */
b0d623f7 1578 vm_page_lockspin_queues();
1c79356b 1579
39236c6e
A
1580 internal_to_external = FALSE;
1581 external_to_internal = FALSE;
1582
1583 if (mem->local) {
1584 /*
1585 * it's much easier to get the vm_page_pageable_xxx accounting correct
1586 * if we first move the page to the active queue... it's going to end
1587 * up there anyway, and we don't do vm_page_rename's frequently enough
1588 * for this to matter.
1589 */
1590 VM_PAGE_QUEUES_REMOVE(mem);
1591 vm_page_activate(mem);
1592 }
1593 if (mem->active || mem->inactive || mem->speculative) {
1594 if (mem->object->internal && !new_object->internal) {
1595 internal_to_external = TRUE;
1596 }
1597 if (!mem->object->internal && new_object->internal) {
1598 external_to_internal = TRUE;
1599 }
1600 }
1601
b0d623f7 1602 vm_page_remove(mem, TRUE);
316670eb 1603 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1c79356b 1604
39236c6e
A
1605 if (internal_to_external) {
1606 vm_page_pageable_internal_count--;
1607 vm_page_pageable_external_count++;
1608 } else if (external_to_internal) {
1609 vm_page_pageable_external_count--;
1610 vm_page_pageable_internal_count++;
1611 }
1612
1c79356b
A
1613 vm_page_unlock_queues();
1614}
1615
1616/*
1617 * vm_page_init:
1618 *
1619 * Initialize the fields in a new page.
1620 * This takes a structure with random values and initializes it
1621 * so that it can be given to vm_page_release or vm_page_insert.
1622 */
1623void
1624vm_page_init(
1625 vm_page_t mem,
0b4c1975
A
1626 ppnum_t phys_page,
1627 boolean_t lopage)
1c79356b 1628{
91447636 1629 assert(phys_page);
7ddcb079
A
1630
1631#if DEBUG
1632 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1633 if (!(pmap_valid_page(phys_page))) {
1634 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1635 }
1636 }
1637#endif
1c79356b 1638 *mem = vm_page_template;
55e303ae 1639 mem->phys_page = phys_page;
6d2010ae
A
1640#if 0
1641 /*
1642 * we're leaving this turned off for now... currently pages
1643 * come off the free list and are either immediately dirtied/referenced
1644 * due to zero-fill or COW faults, or are used to read or write files...
1645 * in the file I/O case, the UPL mechanism takes care of clearing
1646 * the state of the HW ref/mod bits in a somewhat fragile way.
1647 * Since we may change the way this works in the future (to toughen it up),
1648 * I'm leaving this as a reminder of where these bits could get cleared
1649 */
1650
1651 /*
1652 * make sure both the h/w referenced and modified bits are
1653 * clear at this point... we are especially dependent on
1654 * not finding a 'stale' h/w modified in a number of spots
1655 * once this page goes back into use
1656 */
1657 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1658#endif
0b4c1975 1659 mem->lopage = lopage;
1c79356b
A
1660}
1661
1662/*
1663 * vm_page_grab_fictitious:
1664 *
1665 * Remove a fictitious page from the free list.
1666 * Returns VM_PAGE_NULL if there are no free pages.
1667 */
1668int c_vm_page_grab_fictitious = 0;
6d2010ae 1669int c_vm_page_grab_fictitious_failed = 0;
1c79356b
A
1670int c_vm_page_release_fictitious = 0;
1671int c_vm_page_more_fictitious = 0;
1672
1673vm_page_t
2d21ac55 1674vm_page_grab_fictitious_common(
b0d623f7 1675 ppnum_t phys_addr)
1c79356b 1676{
6d2010ae
A
1677 vm_page_t m;
1678
1679 if ((m = (vm_page_t)zget(vm_page_zone))) {
1c79356b 1680
0b4c1975 1681 vm_page_init(m, phys_addr, FALSE);
1c79356b 1682 m->fictitious = TRUE;
1c79356b 1683
6d2010ae
A
1684 c_vm_page_grab_fictitious++;
1685 } else
1686 c_vm_page_grab_fictitious_failed++;
1687
1c79356b
A
1688 return m;
1689}
1690
2d21ac55
A
1691vm_page_t
1692vm_page_grab_fictitious(void)
1693{
1694 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1695}
1696
1697vm_page_t
1698vm_page_grab_guard(void)
1699{
1700 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1701}
1702
6d2010ae 1703
1c79356b
A
1704/*
1705 * vm_page_release_fictitious:
1706 *
6d2010ae 1707 * Release a fictitious page to the zone pool
1c79356b 1708 */
1c79356b
A
1709void
1710vm_page_release_fictitious(
6d2010ae 1711 vm_page_t m)
1c79356b
A
1712{
1713 assert(!m->free);
1c79356b 1714 assert(m->fictitious);
2d21ac55
A
1715 assert(m->phys_page == vm_page_fictitious_addr ||
1716 m->phys_page == vm_page_guard_addr);
1c79356b
A
1717
1718 c_vm_page_release_fictitious++;
6d2010ae 1719
91447636 1720 zfree(vm_page_zone, m);
1c79356b
A
1721}
1722
1723/*
1724 * vm_page_more_fictitious:
1725 *
6d2010ae 1726 * Add more fictitious pages to the zone.
1c79356b
A
1727 * Allowed to block. This routine is way intimate
1728 * with the zones code, for several reasons:
1729 * 1. we need to carve some page structures out of physical
1730 * memory before zones work, so they _cannot_ come from
1731 * the zone_map.
1732 * 2. the zone needs to be collectable in order to prevent
1733 * growth without bound. These structures are used by
1734 * the device pager (by the hundreds and thousands), as
1735 * private pages for pageout, and as blocking pages for
1736 * pagein. Temporary bursts in demand should not result in
1737 * permanent allocation of a resource.
1738 * 3. To smooth allocation humps, we allocate single pages
1739 * with kernel_memory_allocate(), and cram them into the
6d2010ae 1740 * zone.
1c79356b
A
1741 */
1742
1743void vm_page_more_fictitious(void)
1744{
6d2010ae
A
1745 vm_offset_t addr;
1746 kern_return_t retval;
1c79356b
A
1747
1748 c_vm_page_more_fictitious++;
1749
1c79356b
A
1750 /*
1751 * Allocate a single page from the zone_map. Do not wait if no physical
1752 * pages are immediately available, and do not zero the space. We need
1753 * our own blocking lock here to prevent having multiple,
1754 * simultaneous requests from piling up on the zone_map lock. Exactly
1755 * one (of our) threads should be potentially waiting on the map lock.
1756 * If winner is not vm-privileged, then the page allocation will fail,
1757 * and it will temporarily block here in the vm_page_wait().
1758 */
b0d623f7 1759 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
1760 /*
1761 * If another thread allocated space, just bail out now.
1762 */
1763 if (zone_free_count(vm_page_zone) > 5) {
1764 /*
1765 * The number "5" is a small number that is larger than the
1766 * number of fictitious pages that any single caller will
1767 * attempt to allocate. Otherwise, a thread will attempt to
1768 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1769 * release all of the resources and locks already acquired,
1770 * and then call this routine. This routine finds the pages
1771 * that the caller released, so fails to allocate new space.
1772 * The process repeats infinitely. The largest known number
1773 * of fictitious pages required in this manner is 2. 5 is
1774 * simply a somewhat larger number.
1775 */
b0d623f7 1776 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1777 return;
1778 }
1779
91447636
A
1780 retval = kernel_memory_allocate(zone_map,
1781 &addr, PAGE_SIZE, VM_PROT_ALL,
1782 KMA_KOBJECT|KMA_NOPAGEWAIT);
1783 if (retval != KERN_SUCCESS) {
1c79356b 1784 /*
6d2010ae 1785 * No page was available. Drop the
1c79356b
A
1786 * lock to give another thread a chance at it, and
1787 * wait for the pageout daemon to make progress.
1788 */
b0d623f7 1789 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1790 vm_page_wait(THREAD_UNINT);
1791 return;
1792 }
39236c6e
A
1793
1794 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1795 OSAddAtomic64(1, &(vm_page_zone->page_count));
1796
7ddcb079 1797 zcram(vm_page_zone, addr, PAGE_SIZE);
6d2010ae 1798
b0d623f7 1799 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1800}
1801
1c79356b
A
1802
1803/*
1804 * vm_pool_low():
1805 *
1806 * Return true if it is not likely that a non-vm_privileged thread
1807 * can get memory without blocking. Advisory only, since the
1808 * situation may change under us.
1809 */
1810int
1811vm_pool_low(void)
1812{
1813 /* No locking, at worst we will fib. */
b0d623f7 1814 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
1815}
1816
0c530ab8
A
1817
1818
1819/*
1820 * this is an interface to support bring-up of drivers
1821 * on platforms with physical memory > 4G...
1822 */
1823int vm_himemory_mode = 0;
1824
1825
1826/*
1827 * this interface exists to support hardware controllers
1828 * incapable of generating DMAs with more than 32 bits
1829 * of address on platforms with physical memory > 4G...
1830 */
0b4c1975
A
1831unsigned int vm_lopages_allocated_q = 0;
1832unsigned int vm_lopages_allocated_cpm_success = 0;
1833unsigned int vm_lopages_allocated_cpm_failed = 0;
2d21ac55 1834queue_head_t vm_lopage_queue_free;
0c530ab8
A
1835
1836vm_page_t
1837vm_page_grablo(void)
1838{
0b4c1975 1839 vm_page_t mem;
0c530ab8 1840
0b4c1975 1841 if (vm_lopage_needed == FALSE)
0c530ab8
A
1842 return (vm_page_grab());
1843
b0d623f7 1844 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 1845
0b4c1975
A
1846 if ( !queue_empty(&vm_lopage_queue_free)) {
1847 queue_remove_first(&vm_lopage_queue_free,
1848 mem,
1849 vm_page_t,
1850 pageq);
1851 assert(vm_lopage_free_count);
0c530ab8 1852
0b4c1975
A
1853 vm_lopage_free_count--;
1854 vm_lopages_allocated_q++;
1855
1856 if (vm_lopage_free_count < vm_lopage_lowater)
1857 vm_lopage_refill = TRUE;
0c530ab8 1858
0b4c1975 1859 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 1860 } else {
0b4c1975
A
1861 lck_mtx_unlock(&vm_page_queue_free_lock);
1862
1863 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1864
1865 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1866 vm_lopages_allocated_cpm_failed++;
1867 lck_mtx_unlock(&vm_page_queue_free_lock);
1868
1869 return (VM_PAGE_NULL);
1870 }
1871 mem->busy = TRUE;
1872
1873 vm_page_lockspin_queues();
1874
1875 mem->gobbled = FALSE;
1876 vm_page_gobble_count--;
1877 vm_page_wire_count--;
1878
1879 vm_lopages_allocated_cpm_success++;
1880 vm_page_unlock_queues();
0c530ab8 1881 }
0b4c1975
A
1882 assert(mem->busy);
1883 assert(!mem->free);
1884 assert(!mem->pmapped);
1885 assert(!mem->wpmapped);
7ddcb079 1886 assert(!pmap_is_noencrypt(mem->phys_page));
0b4c1975
A
1887
1888 mem->pageq.next = NULL;
1889 mem->pageq.prev = NULL;
0c530ab8
A
1890
1891 return (mem);
1892}
1893
6d2010ae 1894
1c79356b
A
1895/*
1896 * vm_page_grab:
1897 *
2d21ac55
A
1898 * first try to grab a page from the per-cpu free list...
1899 * this must be done while pre-emption is disabled... if
1900 * a page is available, we're done...
1901 * if no page is available, grab the vm_page_queue_free_lock
1902 * and see if current number of free pages would allow us
1903 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1904 * if there are pages available, disable preemption and
1905 * recheck the state of the per-cpu free list... we could
1906 * have been preempted and moved to a different cpu, or
1907 * some other thread could have re-filled it... if still
1908 * empty, figure out how many pages we can steal from the
1909 * global free queue and move to the per-cpu queue...
1910 * return 1 of these pages when done... only wakeup the
1911 * pageout_scan thread if we moved pages from the global
1912 * list... no need for the wakeup if we've satisfied the
1913 * request from the per-cpu queue.
1c79356b
A
1914 */
1915
2d21ac55
A
1916#define COLOR_GROUPS_TO_STEAL 4
1917
1c79356b
A
1918
1919vm_page_t
2d21ac55 1920vm_page_grab( void )
1c79356b 1921{
2d21ac55
A
1922 vm_page_t mem;
1923
1924
1925 disable_preemption();
1926
1927 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1928return_page_from_cpu_list:
1929 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1930 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1931 mem->pageq.next = NULL;
1932
1933 enable_preemption();
1934
1935 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1936 assert(mem->tabled == FALSE);
1937 assert(mem->object == VM_OBJECT_NULL);
1938 assert(!mem->laundry);
1939 assert(!mem->free);
1940 assert(pmap_verify_free(mem->phys_page));
1941 assert(mem->busy);
1942 assert(!mem->encrypted);
1943 assert(!mem->pmapped);
4a3eedf9 1944 assert(!mem->wpmapped);
6d2010ae
A
1945 assert(!mem->active);
1946 assert(!mem->inactive);
1947 assert(!mem->throttled);
1948 assert(!mem->speculative);
7ddcb079 1949 assert(!pmap_is_noencrypt(mem->phys_page));
2d21ac55
A
1950
1951 return mem;
1952 }
1953 enable_preemption();
1954
1c79356b 1955
1c79356b
A
1956 /*
1957 * Optionally produce warnings if the wire or gobble
1958 * counts exceed some threshold.
1959 */
1960 if (vm_page_wire_count_warning > 0
1961 && vm_page_wire_count >= vm_page_wire_count_warning) {
1962 printf("mk: vm_page_grab(): high wired page count of %d\n",
1963 vm_page_wire_count);
1964 assert(vm_page_wire_count < vm_page_wire_count_warning);
1965 }
1966 if (vm_page_gobble_count_warning > 0
1967 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1968 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1969 vm_page_gobble_count);
1970 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1971 }
1972
b0d623f7
A
1973 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1974
1c79356b
A
1975 /*
1976 * Only let privileged threads (involved in pageout)
1977 * dip into the reserved pool.
1978 */
1c79356b 1979 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 1980 !(current_thread()->options & TH_OPT_VMPRIV)) {
b0d623f7 1981 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 1982 mem = VM_PAGE_NULL;
1c79356b 1983 }
2d21ac55
A
1984 else {
1985 vm_page_t head;
1986 vm_page_t tail;
1987 unsigned int pages_to_steal;
1988 unsigned int color;
1c79356b 1989
2d21ac55 1990 while ( vm_page_free_count == 0 ) {
1c79356b 1991
b0d623f7 1992 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1993 /*
1994 * must be a privileged thread to be
1995 * in this state since a non-privileged
1996 * thread would have bailed if we were
1997 * under the vm_page_free_reserved mark
1998 */
1999 VM_PAGE_WAIT();
b0d623f7 2000 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2001 }
2002
2003 disable_preemption();
2004
2005 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 2006 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2007
2008 /*
2009 * we got preempted and moved to another processor
2010 * or we got preempted and someone else ran and filled the cache
2011 */
2012 goto return_page_from_cpu_list;
2013 }
2014 if (vm_page_free_count <= vm_page_free_reserved)
2015 pages_to_steal = 1;
2016 else {
2017 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
2018
2019 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
2020 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2021 }
2022 color = PROCESSOR_DATA(current_processor(), start_color);
2023 head = tail = NULL;
2024
2025 while (pages_to_steal--) {
2026 if (--vm_page_free_count < vm_page_free_count_minimum)
2027 vm_page_free_count_minimum = vm_page_free_count;
2028
2029 while (queue_empty(&vm_page_queue_free[color]))
2030 color = (color + 1) & vm_color_mask;
2031
2032 queue_remove_first(&vm_page_queue_free[color],
2033 mem,
2034 vm_page_t,
2035 pageq);
2036 mem->pageq.next = NULL;
2037 mem->pageq.prev = NULL;
2038
6d2010ae
A
2039 assert(!mem->active);
2040 assert(!mem->inactive);
2041 assert(!mem->throttled);
2042 assert(!mem->speculative);
2043
2d21ac55
A
2044 color = (color + 1) & vm_color_mask;
2045
2046 if (head == NULL)
2047 head = mem;
2048 else
2049 tail->pageq.next = (queue_t)mem;
2050 tail = mem;
2051
2052 mem->pageq.prev = NULL;
2053 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2054 assert(mem->tabled == FALSE);
2055 assert(mem->object == VM_OBJECT_NULL);
2056 assert(!mem->laundry);
2057 assert(mem->free);
2058 mem->free = FALSE;
2059
2060 assert(pmap_verify_free(mem->phys_page));
2061 assert(mem->busy);
2062 assert(!mem->free);
2063 assert(!mem->encrypted);
2064 assert(!mem->pmapped);
4a3eedf9 2065 assert(!mem->wpmapped);
7ddcb079 2066 assert(!pmap_is_noencrypt(mem->phys_page));
2d21ac55
A
2067 }
2068 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2069 PROCESSOR_DATA(current_processor(), start_color) = color;
2070
2071 /*
2072 * satisfy this request
2073 */
2074 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2075 mem = head;
2076 mem->pageq.next = NULL;
91447636 2077
b0d623f7 2078 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2079
2080 enable_preemption();
2081 }
1c79356b
A
2082 /*
2083 * Decide if we should poke the pageout daemon.
2084 * We do this if the free count is less than the low
2085 * water mark, or if the free count is less than the high
2086 * water mark (but above the low water mark) and the inactive
2087 * count is less than its target.
2088 *
2089 * We don't have the counts locked ... if they change a little,
2090 * it doesn't really matter.
2091 */
1c79356b 2092 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
2093 ((vm_page_free_count < vm_page_free_target) &&
2094 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2095 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 2096
6d2010ae
A
2097 VM_CHECK_MEMORYSTATUS;
2098
55e303ae 2099// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1c79356b
A
2100
2101 return mem;
2102}
2103
2104/*
2105 * vm_page_release:
2106 *
2107 * Return a page to the free list.
2108 */
2109
2110void
2111vm_page_release(
2112 register vm_page_t mem)
2113{
2d21ac55 2114 unsigned int color;
b0d623f7
A
2115 int need_wakeup = 0;
2116 int need_priv_wakeup = 0;
55e303ae 2117
6d2010ae 2118
1c79356b 2119 assert(!mem->private && !mem->fictitious);
b0d623f7
A
2120 if (vm_page_free_verify) {
2121 assert(pmap_verify_free(mem->phys_page));
2122 }
55e303ae 2123// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 2124
7ddcb079
A
2125 pmap_clear_noencrypt(mem->phys_page);
2126
b0d623f7 2127 lck_mtx_lock_spin(&vm_page_queue_free_lock);
91447636 2128#if DEBUG
1c79356b
A
2129 if (mem->free)
2130 panic("vm_page_release");
91447636 2131#endif
6d2010ae 2132
2d21ac55 2133 assert(mem->busy);
91447636
A
2134 assert(!mem->laundry);
2135 assert(mem->object == VM_OBJECT_NULL);
2136 assert(mem->pageq.next == NULL &&
2137 mem->pageq.prev == NULL);
2d21ac55
A
2138 assert(mem->listq.next == NULL &&
2139 mem->listq.prev == NULL);
2140
6d2010ae 2141 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975
A
2142 vm_lopage_free_count < vm_lopage_free_limit &&
2143 mem->phys_page < max_valid_low_ppnum) {
0c530ab8
A
2144 /*
2145 * this exists to support hardware controllers
2146 * incapable of generating DMAs with more than 32 bits
2147 * of address on platforms with physical memory > 4G...
2148 */
2d21ac55
A
2149 queue_enter_first(&vm_lopage_queue_free,
2150 mem,
2151 vm_page_t,
2152 pageq);
0c530ab8 2153 vm_lopage_free_count++;
0b4c1975
A
2154
2155 if (vm_lopage_free_count >= vm_lopage_free_limit)
2156 vm_lopage_refill = FALSE;
2157
2158 mem->lopage = TRUE;
0c530ab8 2159 } else {
6d2010ae 2160 mem->lopage = FALSE;
0b4c1975
A
2161 mem->free = TRUE;
2162
2d21ac55
A
2163 color = mem->phys_page & vm_color_mask;
2164 queue_enter_first(&vm_page_queue_free[color],
2165 mem,
2166 vm_page_t,
2167 pageq);
0c530ab8
A
2168 vm_page_free_count++;
2169 /*
2170 * Check if we should wake up someone waiting for page.
2171 * But don't bother waking them unless they can allocate.
2172 *
2173 * We wakeup only one thread, to prevent starvation.
2174 * Because the scheduling system handles wait queues FIFO,
2175 * if we wakeup all waiting threads, one greedy thread
2176 * can starve multiple niceguy threads. When the threads
2177 * all wakeup, the greedy threads runs first, grabs the page,
2178 * and waits for another page. It will be the first to run
2179 * when the next page is freed.
2180 *
2181 * However, there is a slight danger here.
2182 * The thread we wake might not use the free page.
2183 * Then the other threads could wait indefinitely
2184 * while the page goes unused. To forestall this,
2185 * the pageout daemon will keep making free pages
2186 * as long as vm_page_free_wanted is non-zero.
2187 */
1c79356b 2188
b0d623f7
A
2189 assert(vm_page_free_count > 0);
2190 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 2191 vm_page_free_wanted_privileged--;
b0d623f7
A
2192 need_priv_wakeup = 1;
2193 } else if (vm_page_free_wanted > 0 &&
2194 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 2195 vm_page_free_wanted--;
b0d623f7 2196 need_wakeup = 1;
0c530ab8 2197 }
1c79356b 2198 }
b0d623f7
A
2199 lck_mtx_unlock(&vm_page_queue_free_lock);
2200
2201 if (need_priv_wakeup)
2202 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2203 else if (need_wakeup)
2204 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55 2205
6d2010ae 2206 VM_CHECK_MEMORYSTATUS;
1c79356b
A
2207}
2208
1c79356b
A
2209/*
2210 * vm_page_wait:
2211 *
2212 * Wait for a page to become available.
2213 * If there are plenty of free pages, then we don't sleep.
2214 *
2215 * Returns:
2216 * TRUE: There may be another page, try again
2217 * FALSE: We were interrupted out of our wait, don't try again
2218 */
2219
2220boolean_t
2221vm_page_wait(
2222 int interruptible )
2223{
2224 /*
2225 * We can't use vm_page_free_reserved to make this
2226 * determination. Consider: some thread might
2227 * need to allocate two pages. The first allocation
2228 * succeeds, the second fails. After the first page is freed,
2229 * a call to vm_page_wait must really block.
2230 */
9bccf70c 2231 kern_return_t wait_result;
9bccf70c 2232 int need_wakeup = 0;
2d21ac55 2233 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 2234
b0d623f7 2235 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2236
2237 if (is_privileged && vm_page_free_count) {
b0d623f7 2238 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2239 return TRUE;
2240 }
1c79356b 2241 if (vm_page_free_count < vm_page_free_target) {
2d21ac55
A
2242
2243 if (is_privileged) {
2244 if (vm_page_free_wanted_privileged++ == 0)
2245 need_wakeup = 1;
2246 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2247 } else {
2248 if (vm_page_free_wanted++ == 0)
2249 need_wakeup = 1;
2250 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2251 }
b0d623f7 2252 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2253 counter(c_vm_page_wait_block++);
0b4e3aa0
A
2254
2255 if (need_wakeup)
2256 thread_wakeup((event_t)&vm_page_free_wanted);
9bccf70c 2257
39236c6e
A
2258 if (wait_result == THREAD_WAITING) {
2259 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2260 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
9bccf70c 2261 wait_result = thread_block(THREAD_CONTINUE_NULL);
39236c6e
A
2262 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2263 }
9bccf70c 2264
1c79356b
A
2265 return(wait_result == THREAD_AWAKENED);
2266 } else {
b0d623f7 2267 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b
A
2268 return TRUE;
2269 }
2270}
2271
2272/*
2273 * vm_page_alloc:
2274 *
2275 * Allocate and return a memory cell associated
2276 * with this VM object/offset pair.
2277 *
2278 * Object must be locked.
2279 */
2280
2281vm_page_t
2282vm_page_alloc(
2283 vm_object_t object,
2284 vm_object_offset_t offset)
2285{
2286 register vm_page_t mem;
2287
2d21ac55 2288 vm_object_lock_assert_exclusive(object);
1c79356b
A
2289 mem = vm_page_grab();
2290 if (mem == VM_PAGE_NULL)
2291 return VM_PAGE_NULL;
2292
2293 vm_page_insert(mem, object, offset);
2294
2295 return(mem);
2296}
2297
0c530ab8
A
2298vm_page_t
2299vm_page_alloclo(
2300 vm_object_t object,
2301 vm_object_offset_t offset)
2302{
2303 register vm_page_t mem;
2304
2d21ac55 2305 vm_object_lock_assert_exclusive(object);
0c530ab8
A
2306 mem = vm_page_grablo();
2307 if (mem == VM_PAGE_NULL)
2308 return VM_PAGE_NULL;
2309
2310 vm_page_insert(mem, object, offset);
2311
2312 return(mem);
2313}
2314
2315
2d21ac55
A
2316/*
2317 * vm_page_alloc_guard:
2318 *
b0d623f7 2319 * Allocate a fictitious page which will be used
2d21ac55
A
2320 * as a guard page. The page will be inserted into
2321 * the object and returned to the caller.
2322 */
2323
2324vm_page_t
2325vm_page_alloc_guard(
2326 vm_object_t object,
2327 vm_object_offset_t offset)
2328{
2329 register vm_page_t mem;
2330
2331 vm_object_lock_assert_exclusive(object);
2332 mem = vm_page_grab_guard();
2333 if (mem == VM_PAGE_NULL)
2334 return VM_PAGE_NULL;
2335
2336 vm_page_insert(mem, object, offset);
2337
2338 return(mem);
2339}
2340
2341
1c79356b
A
2342counter(unsigned int c_laundry_pages_freed = 0;)
2343
1c79356b 2344/*
6d2010ae 2345 * vm_page_free_prepare:
1c79356b 2346 *
6d2010ae
A
2347 * Removes page from any queue it may be on
2348 * and disassociates it from its VM object.
1c79356b
A
2349 *
2350 * Object and page queues must be locked prior to entry.
2351 */
b0d623f7 2352static void
2d21ac55 2353vm_page_free_prepare(
6d2010ae 2354 vm_page_t mem)
b0d623f7
A
2355{
2356 vm_page_free_prepare_queues(mem);
2357 vm_page_free_prepare_object(mem, TRUE);
2358}
2359
2360
2361void
2362vm_page_free_prepare_queues(
2363 vm_page_t mem)
1c79356b 2364{
2d21ac55 2365 VM_PAGE_CHECK(mem);
1c79356b
A
2366 assert(!mem->free);
2367 assert(!mem->cleaning);
2d21ac55 2368#if DEBUG
b0d623f7 2369 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2370 if (mem->free)
b0d623f7 2371 panic("vm_page_free: freeing page on free list\n");
91447636 2372#endif
b0d623f7
A
2373 if (mem->object) {
2374 vm_object_lock_assert_exclusive(mem->object);
2375 }
2d21ac55
A
2376 if (mem->laundry) {
2377 /*
2378 * We may have to free a page while it's being laundered
2379 * if we lost its pager (due to a forced unmount, for example).
316670eb
A
2380 * We need to call vm_pageout_steal_laundry() before removing
2381 * the page from its VM object, so that we can remove it
2382 * from its pageout queue and adjust the laundry accounting
2d21ac55 2383 */
316670eb 2384 vm_pageout_steal_laundry(mem, TRUE);
2d21ac55
A
2385 counter(++c_laundry_pages_freed);
2386 }
39236c6e 2387
b0d623f7
A
2388 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2389
2390 if (VM_PAGE_WIRED(mem)) {
2391 if (mem->object) {
2392 assert(mem->object->wired_page_count > 0);
2393 mem->object->wired_page_count--;
2394 assert(mem->object->resident_page_count >=
2395 mem->object->wired_page_count);
6d2010ae
A
2396
2397 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2398 OSAddAtomic(+1, &vm_page_purgeable_count);
2399 assert(vm_page_purgeable_wired_count > 0);
2400 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2401 }
b0d623f7 2402 }
1c79356b
A
2403 if (!mem->private && !mem->fictitious)
2404 vm_page_wire_count--;
2405 mem->wire_count = 0;
2406 assert(!mem->gobbled);
2407 } else if (mem->gobbled) {
2408 if (!mem->private && !mem->fictitious)
2409 vm_page_wire_count--;
2410 vm_page_gobble_count--;
2411 }
b0d623f7
A
2412}
2413
2414
2415void
2416vm_page_free_prepare_object(
2417 vm_page_t mem,
2418 boolean_t remove_from_hash)
2419{
b0d623f7
A
2420 if (mem->tabled)
2421 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 2422
b0d623f7 2423 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b
A
2424
2425 if (mem->private) {
2426 mem->private = FALSE;
2427 mem->fictitious = TRUE;
55e303ae 2428 mem->phys_page = vm_page_fictitious_addr;
1c79356b 2429 }
6d2010ae 2430 if ( !mem->fictitious) {
0b4c1975 2431 vm_page_init(mem, mem->phys_page, mem->lopage);
1c79356b
A
2432 }
2433}
2434
b0d623f7 2435
6d2010ae
A
2436/*
2437 * vm_page_free:
2438 *
2439 * Returns the given page to the free list,
2440 * disassociating it with any VM object.
2441 *
2442 * Object and page queues must be locked prior to entry.
2443 */
2d21ac55
A
2444void
2445vm_page_free(
2446 vm_page_t mem)
2447{
b0d623f7 2448 vm_page_free_prepare(mem);
6d2010ae 2449
b0d623f7
A
2450 if (mem->fictitious) {
2451 vm_page_release_fictitious(mem);
2452 } else {
2453 vm_page_release(mem);
2454 }
2455}
2456
2457
2458void
2459vm_page_free_unlocked(
2460 vm_page_t mem,
2461 boolean_t remove_from_hash)
2462{
2463 vm_page_lockspin_queues();
2464 vm_page_free_prepare_queues(mem);
2465 vm_page_unlock_queues();
2466
2467 vm_page_free_prepare_object(mem, remove_from_hash);
2468
2d21ac55
A
2469 if (mem->fictitious) {
2470 vm_page_release_fictitious(mem);
2471 } else {
2472 vm_page_release(mem);
2473 }
2474}
55e303ae 2475
316670eb 2476
2d21ac55
A
2477/*
2478 * Free a list of pages. The list can be up to several hundred pages,
2479 * as blocked up by vm_pageout_scan().
b0d623f7 2480 * The big win is not having to take the free list lock once
316670eb 2481 * per page.
2d21ac55 2482 */
55e303ae
A
2483void
2484vm_page_free_list(
316670eb 2485 vm_page_t freeq,
b0d623f7 2486 boolean_t prepare_object)
55e303ae 2487{
316670eb 2488 vm_page_t mem;
2d21ac55 2489 vm_page_t nxt;
316670eb
A
2490 vm_page_t local_freeq;
2491 int pg_count;
2d21ac55 2492
316670eb 2493 while (freeq) {
55e303ae 2494
316670eb
A
2495 pg_count = 0;
2496 local_freeq = VM_PAGE_NULL;
2497 mem = freeq;
b0d623f7 2498
316670eb
A
2499 /*
2500 * break up the processing into smaller chunks so
2501 * that we can 'pipeline' the pages onto the
2502 * free list w/o introducing too much
2503 * contention on the global free queue lock
2504 */
2505 while (mem && pg_count < 64) {
2506
2507 assert(!mem->inactive);
2508 assert(!mem->active);
2509 assert(!mem->throttled);
2510 assert(!mem->free);
2511 assert(!mem->speculative);
2512 assert(!VM_PAGE_WIRED(mem));
2513 assert(mem->pageq.prev == NULL);
2514
2515 nxt = (vm_page_t)(mem->pageq.next);
b0d623f7 2516
316670eb
A
2517 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2518 assert(pmap_verify_free(mem->phys_page));
2519 }
2520 if (prepare_object == TRUE)
2521 vm_page_free_prepare_object(mem, TRUE);
b0d623f7 2522
316670eb
A
2523 if (!mem->fictitious) {
2524 assert(mem->busy);
55e303ae 2525
316670eb
A
2526 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2527 vm_lopage_free_count < vm_lopage_free_limit &&
2528 mem->phys_page < max_valid_low_ppnum) {
2529 mem->pageq.next = NULL;
2530 vm_page_release(mem);
2531 } else {
2532 /*
2533 * IMPORTANT: we can't set the page "free" here
2534 * because that would make the page eligible for
2535 * a physically-contiguous allocation (see
2536 * vm_page_find_contiguous()) right away (we don't
2537 * hold the vm_page_queue_free lock). That would
2538 * cause trouble because the page is not actually
2539 * in the free queue yet...
2540 */
2541 mem->pageq.next = (queue_entry_t)local_freeq;
2542 local_freeq = mem;
2543 pg_count++;
935ed37a 2544
316670eb 2545 pmap_clear_noencrypt(mem->phys_page);
935ed37a 2546 }
316670eb
A
2547 } else {
2548 assert(mem->phys_page == vm_page_fictitious_addr ||
2549 mem->phys_page == vm_page_guard_addr);
2550 vm_page_release_fictitious(mem);
2d21ac55 2551 }
316670eb 2552 mem = nxt;
55e303ae 2553 }
316670eb
A
2554 freeq = mem;
2555
2556 if ( (mem = local_freeq) ) {
2557 unsigned int avail_free_count;
2558 unsigned int need_wakeup = 0;
2559 unsigned int need_priv_wakeup = 0;
2d21ac55 2560
316670eb 2561 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 2562
316670eb
A
2563 while (mem) {
2564 int color;
2565
2566 nxt = (vm_page_t)(mem->pageq.next);
2d21ac55 2567
b0d623f7
A
2568 assert(!mem->free);
2569 assert(mem->busy);
2570 mem->free = TRUE;
b0d623f7 2571
316670eb
A
2572 color = mem->phys_page & vm_color_mask;
2573 queue_enter_first(&vm_page_queue_free[color],
2574 mem,
2575 vm_page_t,
2576 pageq);
2577 mem = nxt;
2d21ac55 2578 }
316670eb
A
2579 vm_page_free_count += pg_count;
2580 avail_free_count = vm_page_free_count;
2581
2582 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2583
2584 if (avail_free_count < vm_page_free_wanted_privileged) {
2585 need_priv_wakeup = avail_free_count;
2586 vm_page_free_wanted_privileged -= avail_free_count;
2587 avail_free_count = 0;
2588 } else {
2589 need_priv_wakeup = vm_page_free_wanted_privileged;
2590 vm_page_free_wanted_privileged = 0;
2591 avail_free_count -= vm_page_free_wanted_privileged;
2592 }
b0d623f7 2593 }
316670eb
A
2594 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2595 unsigned int available_pages;
55e303ae 2596
316670eb 2597 available_pages = avail_free_count - vm_page_free_reserved;
55e303ae 2598
316670eb
A
2599 if (available_pages >= vm_page_free_wanted) {
2600 need_wakeup = vm_page_free_wanted;
2601 vm_page_free_wanted = 0;
2602 } else {
2603 need_wakeup = available_pages;
2604 vm_page_free_wanted -= available_pages;
2605 }
2606 }
2607 lck_mtx_unlock(&vm_page_queue_free_lock);
55e303ae 2608
316670eb
A
2609 if (need_priv_wakeup != 0) {
2610 /*
2611 * There shouldn't be that many VM-privileged threads,
2612 * so let's wake them all up, even if we don't quite
2613 * have enough pages to satisfy them all.
2614 */
2615 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2616 }
2617 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2618 /*
2619 * We don't expect to have any more waiters
2620 * after this, so let's wake them all up at
2621 * once.
2622 */
2623 thread_wakeup((event_t) &vm_page_free_count);
2624 } else for (; need_wakeup != 0; need_wakeup--) {
2625 /*
2626 * Wake up one waiter per page we just released.
2627 */
2628 thread_wakeup_one((event_t) &vm_page_free_count);
55e303ae 2629 }
2d21ac55 2630
316670eb 2631 VM_CHECK_MEMORYSTATUS;
b0d623f7 2632 }
55e303ae
A
2633 }
2634}
2635
2636
1c79356b
A
2637/*
2638 * vm_page_wire:
2639 *
2640 * Mark this page as wired down by yet
2641 * another map, removing it from paging queues
2642 * as necessary.
2643 *
2644 * The page's object and the page queues must be locked.
2645 */
2646void
2647vm_page_wire(
2648 register vm_page_t mem)
2649{
2650
91447636 2651// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1c79356b
A
2652
2653 VM_PAGE_CHECK(mem);
b0d623f7
A
2654 if (mem->object) {
2655 vm_object_lock_assert_exclusive(mem->object);
2656 } else {
2657 /*
2658 * In theory, the page should be in an object before it
2659 * gets wired, since we need to hold the object lock
2660 * to update some fields in the page structure.
2661 * However, some code (i386 pmap, for example) might want
2662 * to wire a page before it gets inserted into an object.
2663 * That's somewhat OK, as long as nobody else can get to
2664 * that page and update it at the same time.
2665 */
2666 }
91447636 2667#if DEBUG
b0d623f7 2668 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2669#endif
b0d623f7 2670 if ( !VM_PAGE_WIRED(mem)) {
316670eb
A
2671
2672 if (mem->pageout_queue) {
2673 mem->pageout = FALSE;
2674 vm_pageout_throttle_up(mem);
2675 }
1c79356b 2676 VM_PAGE_QUEUES_REMOVE(mem);
b0d623f7
A
2677
2678 if (mem->object) {
2679 mem->object->wired_page_count++;
2680 assert(mem->object->resident_page_count >=
2681 mem->object->wired_page_count);
2682 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2683 assert(vm_page_purgeable_count > 0);
2684 OSAddAtomic(-1, &vm_page_purgeable_count);
2685 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2686 }
2687 if (mem->object->all_reusable) {
2688 /*
2689 * Wired pages are not counted as "re-usable"
2690 * in "all_reusable" VM objects, so nothing
2691 * to do here.
2692 */
2693 } else if (mem->reusable) {
2694 /*
2695 * This page is not "re-usable" when it's
2696 * wired, so adjust its state and the
2697 * accounting.
2698 */
2699 vm_object_reuse_pages(mem->object,
2700 mem->offset,
2701 mem->offset+PAGE_SIZE_64,
2702 FALSE);
2703 }
2704 }
2705 assert(!mem->reusable);
2706
1c79356b
A
2707 if (!mem->private && !mem->fictitious && !mem->gobbled)
2708 vm_page_wire_count++;
2709 if (mem->gobbled)
2710 vm_page_gobble_count--;
2711 mem->gobbled = FALSE;
593a1d5f 2712
6d2010ae
A
2713 VM_CHECK_MEMORYSTATUS;
2714
91447636
A
2715 /*
2716 * ENCRYPTED SWAP:
2717 * The page could be encrypted, but
2718 * We don't have to decrypt it here
2719 * because we don't guarantee that the
2720 * data is actually valid at this point.
2721 * The page will get decrypted in
2722 * vm_fault_wire() if needed.
2723 */
1c79356b
A
2724 }
2725 assert(!mem->gobbled);
2726 mem->wire_count++;
b0d623f7 2727 VM_PAGE_CHECK(mem);
1c79356b
A
2728}
2729
2730/*
2731 * vm_page_gobble:
2732 *
2733 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2734 *
2735 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2736 */
2737void
2738vm_page_gobble(
2739 register vm_page_t mem)
2740{
2d21ac55 2741 vm_page_lockspin_queues();
1c79356b
A
2742 VM_PAGE_CHECK(mem);
2743
2744 assert(!mem->gobbled);
b0d623f7 2745 assert( !VM_PAGE_WIRED(mem));
1c79356b 2746
b0d623f7 2747 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
1c79356b
A
2748 if (!mem->private && !mem->fictitious)
2749 vm_page_wire_count++;
2750 }
2751 vm_page_gobble_count++;
2752 mem->gobbled = TRUE;
2753 vm_page_unlock_queues();
2754}
2755
2756/*
2757 * vm_page_unwire:
2758 *
2759 * Release one wiring of this page, potentially
2760 * enabling it to be paged again.
2761 *
2762 * The page's object and the page queues must be locked.
2763 */
2764void
2765vm_page_unwire(
0b4c1975
A
2766 vm_page_t mem,
2767 boolean_t queueit)
1c79356b
A
2768{
2769
91447636 2770// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1c79356b
A
2771
2772 VM_PAGE_CHECK(mem);
b0d623f7
A
2773 assert(VM_PAGE_WIRED(mem));
2774 assert(mem->object != VM_OBJECT_NULL);
91447636 2775#if DEBUG
b0d623f7
A
2776 vm_object_lock_assert_exclusive(mem->object);
2777 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2778#endif
1c79356b
A
2779 if (--mem->wire_count == 0) {
2780 assert(!mem->private && !mem->fictitious);
2781 vm_page_wire_count--;
b0d623f7
A
2782 assert(mem->object->wired_page_count > 0);
2783 mem->object->wired_page_count--;
2784 assert(mem->object->resident_page_count >=
2785 mem->object->wired_page_count);
2786 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2787 OSAddAtomic(+1, &vm_page_purgeable_count);
2788 assert(vm_page_purgeable_wired_count > 0);
2789 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2790 }
91447636
A
2791 assert(!mem->laundry);
2792 assert(mem->object != kernel_object);
2793 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
0b4c1975
A
2794
2795 if (queueit == TRUE) {
2796 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2797 vm_page_deactivate(mem);
2798 } else {
2799 vm_page_activate(mem);
2800 }
2d21ac55 2801 }
593a1d5f 2802
6d2010ae
A
2803 VM_CHECK_MEMORYSTATUS;
2804
1c79356b 2805 }
b0d623f7 2806 VM_PAGE_CHECK(mem);
1c79356b
A
2807}
2808
2809/*
2810 * vm_page_deactivate:
2811 *
2812 * Returns the given page to the inactive list,
2813 * indicating that no physical maps have access
2814 * to this page. [Used by the physical mapping system.]
2815 *
2816 * The page queues must be locked.
2817 */
2818void
2819vm_page_deactivate(
b0d623f7
A
2820 vm_page_t m)
2821{
2822 vm_page_deactivate_internal(m, TRUE);
2823}
2824
2825
2826void
2827vm_page_deactivate_internal(
2828 vm_page_t m,
2829 boolean_t clear_hw_reference)
1c79356b 2830{
2d21ac55 2831
1c79356b 2832 VM_PAGE_CHECK(m);
91447636 2833 assert(m->object != kernel_object);
2d21ac55 2834 assert(m->phys_page != vm_page_guard_addr);
1c79356b 2835
55e303ae 2836// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
91447636 2837#if DEBUG
b0d623f7 2838 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2839#endif
1c79356b
A
2840 /*
2841 * This page is no longer very interesting. If it was
2842 * interesting (active or inactive/referenced), then we
2843 * clear the reference bit and (re)enter it in the
2844 * inactive queue. Note wired pages should not have
2845 * their reference bit cleared.
2846 */
6d2010ae 2847 assert ( !(m->absent && !m->unusual));
0b4c1975 2848
1c79356b 2849 if (m->gobbled) { /* can this happen? */
b0d623f7 2850 assert( !VM_PAGE_WIRED(m));
2d21ac55 2851
1c79356b
A
2852 if (!m->private && !m->fictitious)
2853 vm_page_wire_count--;
2854 vm_page_gobble_count--;
2855 m->gobbled = FALSE;
2856 }
316670eb
A
2857 /*
2858 * if this page is currently on the pageout queue, we can't do the
2859 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2860 * and we can't remove it manually since we would need the object lock
2861 * (which is not required here) to decrement the activity_in_progress
2862 * reference which is held on the object while the page is in the pageout queue...
2863 * just let the normal laundry processing proceed
2864 */
39236c6e 2865 if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
1c79356b 2866 return;
2d21ac55 2867
6d2010ae 2868 if (!m->absent && clear_hw_reference == TRUE)
2d21ac55
A
2869 pmap_clear_reference(m->phys_page);
2870
2871 m->reference = FALSE;
2d21ac55
A
2872 m->no_cache = FALSE;
2873
2874 if (!m->inactive) {
2875 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 2876
6d2010ae 2877 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
d1ecb069
A
2878 m->dirty && m->object->internal &&
2879 (m->object->purgable == VM_PURGABLE_DENY ||
2880 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2881 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2882 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2883 m->throttled = TRUE;
2884 vm_page_throttled_count++;
9bccf70c 2885 } else {
6d2010ae 2886 if (m->object->named && m->object->ref_count == 1) {
2d21ac55 2887 vm_page_speculate(m, FALSE);
b0d623f7 2888#if DEVELOPMENT || DEBUG
2d21ac55 2889 vm_page_speculative_recreated++;
b0d623f7 2890#endif
2d21ac55 2891 } else {
6d2010ae 2892 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2d21ac55 2893 }
9bccf70c 2894 }
1c79356b
A
2895 }
2896}
2897
316670eb
A
2898/*
2899 * vm_page_enqueue_cleaned
2900 *
2901 * Put the page on the cleaned queue, mark it cleaned, etc.
2902 * Being on the cleaned queue (and having m->clean_queue set)
2903 * does ** NOT ** guarantee that the page is clean!
2904 *
2905 * Call with the queues lock held.
2906 */
2907
2908void vm_page_enqueue_cleaned(vm_page_t m)
2909{
2910 assert(m->phys_page != vm_page_guard_addr);
2911#if DEBUG
2912 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2913#endif
2914 assert( !(m->absent && !m->unusual));
2915
2916 if (m->gobbled) {
2917 assert( !VM_PAGE_WIRED(m));
2918 if (!m->private && !m->fictitious)
2919 vm_page_wire_count--;
2920 vm_page_gobble_count--;
2921 m->gobbled = FALSE;
2922 }
2923 /*
2924 * if this page is currently on the pageout queue, we can't do the
2925 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926 * and we can't remove it manually since we would need the object lock
2927 * (which is not required here) to decrement the activity_in_progress
2928 * reference which is held on the object while the page is in the pageout queue...
2929 * just let the normal laundry processing proceed
2930 */
2931 if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2932 return;
2933
2934 VM_PAGE_QUEUES_REMOVE(m);
2935
2936 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2937 m->clean_queue = TRUE;
2938 vm_page_cleaned_count++;
2939
2940 m->inactive = TRUE;
2941 vm_page_inactive_count++;
39236c6e
A
2942 if (m->object->internal) {
2943 vm_page_pageable_internal_count++;
2944 } else {
2945 vm_page_pageable_external_count++;
2946 }
316670eb
A
2947
2948 vm_pageout_enqueued_cleaned++;
2949}
2950
1c79356b
A
2951/*
2952 * vm_page_activate:
2953 *
2954 * Put the specified page on the active list (if appropriate).
2955 *
2956 * The page queues must be locked.
2957 */
2958
39236c6e
A
2959#if CONFIG_JETSAM
2960#if LATENCY_JETSAM
2961extern struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS];
2962#endif /* LATENCY_JETSAM */
2963#endif /* CONFIG_JETSAM */
2964
1c79356b
A
2965void
2966vm_page_activate(
2967 register vm_page_t m)
2968{
2969 VM_PAGE_CHECK(m);
2d21ac55 2970#ifdef FIXME_4778297
91447636 2971 assert(m->object != kernel_object);
2d21ac55
A
2972#endif
2973 assert(m->phys_page != vm_page_guard_addr);
91447636 2974#if DEBUG
b0d623f7 2975 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2976#endif
6d2010ae 2977 assert( !(m->absent && !m->unusual));
0b4c1975 2978
1c79356b 2979 if (m->gobbled) {
b0d623f7 2980 assert( !VM_PAGE_WIRED(m));
1c79356b
A
2981 if (!m->private && !m->fictitious)
2982 vm_page_wire_count--;
2983 vm_page_gobble_count--;
2984 m->gobbled = FALSE;
2985 }
316670eb
A
2986 /*
2987 * if this page is currently on the pageout queue, we can't do the
2988 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2989 * and we can't remove it manually since we would need the object lock
2990 * (which is not required here) to decrement the activity_in_progress
2991 * reference which is held on the object while the page is in the pageout queue...
2992 * just let the normal laundry processing proceed
2993 */
39236c6e 2994 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
1c79356b
A
2995 return;
2996
2d21ac55
A
2997#if DEBUG
2998 if (m->active)
2999 panic("vm_page_activate: already active");
3000#endif
3001
3002 if (m->speculative) {
3003 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3004 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3005 }
316670eb 3006
2d21ac55
A
3007 VM_PAGE_QUEUES_REMOVE(m);
3008
b0d623f7 3009 if ( !VM_PAGE_WIRED(m)) {
316670eb 3010
6d2010ae
A
3011 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3012 m->dirty && m->object->internal &&
d1ecb069
A
3013 (m->object->purgable == VM_PURGABLE_DENY ||
3014 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3015 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
3016 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3017 m->throttled = TRUE;
3018 vm_page_throttled_count++;
9bccf70c 3019 } else {
2d21ac55
A
3020 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3021 m->active = TRUE;
6d2010ae 3022 vm_page_active_count++;
39236c6e
A
3023 if (m->object->internal) {
3024 vm_page_pageable_internal_count++;
3025 } else {
3026 vm_page_pageable_external_count++;
3027 }
3028#if LATENCY_JETSAM
3029 if (jlp_init) {
3030 uint64_t now = mach_absolute_time();
3031 uint64_t delta = now - jlp_time;
3032 clock_sec_t jl_secs = 0;
3033 clock_usec_t jl_usecs = 0;
3034 vm_page_t jlp;
3035
3036 absolutetime_to_microtime(delta, &jl_secs, &jl_usecs);
3037
3038 jl_usecs += jl_secs * USEC_PER_SEC;
3039 if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) {
3040
3041 jlp = &jetsam_latency_page[jlp_current];
3042 if (jlp->active) {
3043 queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq);
3044 }
3045 queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq);
3046
3047 jlp->active = TRUE;
3048
3049 jlp->offset = now;
3050 jlp_time = jlp->offset;
3051
3052 if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) {
3053 jlp_current = 0;
3054 }
3055
3056 }
3057 }
3058#endif /* LATENCY_JETSAM */
9bccf70c 3059 }
2d21ac55
A
3060 m->reference = TRUE;
3061 m->no_cache = FALSE;
1c79356b 3062 }
b0d623f7 3063 VM_PAGE_CHECK(m);
2d21ac55
A
3064}
3065
3066
3067/*
3068 * vm_page_speculate:
3069 *
3070 * Put the specified page on the speculative list (if appropriate).
3071 *
3072 * The page queues must be locked.
3073 */
3074void
3075vm_page_speculate(
3076 vm_page_t m,
3077 boolean_t new)
3078{
3079 struct vm_speculative_age_q *aq;
3080
3081 VM_PAGE_CHECK(m);
3082 assert(m->object != kernel_object);
2d21ac55 3083 assert(m->phys_page != vm_page_guard_addr);
91447636 3084#if DEBUG
b0d623f7 3085 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 3086#endif
6d2010ae 3087 assert( !(m->absent && !m->unusual));
b0d623f7 3088
316670eb
A
3089 /*
3090 * if this page is currently on the pageout queue, we can't do the
3091 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3092 * and we can't remove it manually since we would need the object lock
3093 * (which is not required here) to decrement the activity_in_progress
3094 * reference which is held on the object while the page is in the pageout queue...
3095 * just let the normal laundry processing proceed
3096 */
39236c6e 3097 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
6d2010ae 3098 return;
0b4c1975 3099
b0d623f7
A
3100 VM_PAGE_QUEUES_REMOVE(m);
3101
3102 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 3103 mach_timespec_t ts;
b0d623f7
A
3104 clock_sec_t sec;
3105 clock_nsec_t nsec;
2d21ac55 3106
b0d623f7
A
3107 clock_get_system_nanotime(&sec, &nsec);
3108 ts.tv_sec = (unsigned int) sec;
3109 ts.tv_nsec = nsec;
2d21ac55
A
3110
3111 if (vm_page_speculative_count == 0) {
3112
3113 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3114 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3115
3116 aq = &vm_page_queue_speculative[speculative_age_index];
3117
3118 /*
3119 * set the timer to begin a new group
3120 */
6d2010ae
A
3121 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3122 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
3123
3124 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3125 } else {
3126 aq = &vm_page_queue_speculative[speculative_age_index];
3127
3128 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3129
3130 speculative_age_index++;
3131
3132 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3133 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3134 if (speculative_age_index == speculative_steal_index) {
3135 speculative_steal_index = speculative_age_index + 1;
3136
3137 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3138 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3139 }
3140 aq = &vm_page_queue_speculative[speculative_age_index];
3141
3142 if (!queue_empty(&aq->age_q))
3143 vm_page_speculate_ageit(aq);
3144
6d2010ae
A
3145 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3146 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
3147
3148 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3149 }
3150 }
3151 enqueue_tail(&aq->age_q, &m->pageq);
3152 m->speculative = TRUE;
3153 vm_page_speculative_count++;
39236c6e
A
3154 if (m->object->internal) {
3155 vm_page_pageable_internal_count++;
3156 } else {
3157 vm_page_pageable_external_count++;
3158 }
2d21ac55
A
3159
3160 if (new == TRUE) {
6d2010ae
A
3161 vm_object_lock_assert_exclusive(m->object);
3162
2d21ac55 3163 m->object->pages_created++;
b0d623f7 3164#if DEVELOPMENT || DEBUG
2d21ac55 3165 vm_page_speculative_created++;
b0d623f7 3166#endif
2d21ac55
A
3167 }
3168 }
b0d623f7 3169 VM_PAGE_CHECK(m);
2d21ac55
A
3170}
3171
3172
3173/*
3174 * move pages from the specified aging bin to
3175 * the speculative bin that pageout_scan claims from
3176 *
3177 * The page queues must be locked.
3178 */
3179void
3180vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3181{
3182 struct vm_speculative_age_q *sq;
3183 vm_page_t t;
3184
3185 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3186
3187 if (queue_empty(&sq->age_q)) {
3188 sq->age_q.next = aq->age_q.next;
3189 sq->age_q.prev = aq->age_q.prev;
3190
3191 t = (vm_page_t)sq->age_q.next;
3192 t->pageq.prev = &sq->age_q;
3193
3194 t = (vm_page_t)sq->age_q.prev;
3195 t->pageq.next = &sq->age_q;
3196 } else {
3197 t = (vm_page_t)sq->age_q.prev;
3198 t->pageq.next = aq->age_q.next;
3199
3200 t = (vm_page_t)aq->age_q.next;
3201 t->pageq.prev = sq->age_q.prev;
3202
3203 t = (vm_page_t)aq->age_q.prev;
3204 t->pageq.next = &sq->age_q;
3205
3206 sq->age_q.prev = aq->age_q.prev;
1c79356b 3207 }
2d21ac55
A
3208 queue_init(&aq->age_q);
3209}
3210
3211
3212void
3213vm_page_lru(
3214 vm_page_t m)
3215{
3216 VM_PAGE_CHECK(m);
3217 assert(m->object != kernel_object);
3218 assert(m->phys_page != vm_page_guard_addr);
3219
3220#if DEBUG
b0d623f7 3221 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 3222#endif
316670eb
A
3223 /*
3224 * if this page is currently on the pageout queue, we can't do the
3225 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3226 * and we can't remove it manually since we would need the object lock
3227 * (which is not required here) to decrement the activity_in_progress
3228 * reference which is held on the object while the page is in the pageout queue...
3229 * just let the normal laundry processing proceed
3230 */
39236c6e 3231 if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
2d21ac55
A
3232 return;
3233
3234 m->no_cache = FALSE;
3235
3236 VM_PAGE_QUEUES_REMOVE(m);
3237
6d2010ae 3238 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
1c79356b
A
3239}
3240
2d21ac55 3241
b0d623f7
A
3242void
3243vm_page_reactivate_all_throttled(void)
3244{
3245 vm_page_t first_throttled, last_throttled;
3246 vm_page_t first_active;
3247 vm_page_t m;
3248 int extra_active_count;
39236c6e 3249 int extra_internal_count, extra_external_count;
b0d623f7 3250
6d2010ae
A
3251 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3252 return;
3253
b0d623f7 3254 extra_active_count = 0;
39236c6e
A
3255 extra_internal_count = 0;
3256 extra_external_count = 0;
b0d623f7
A
3257 vm_page_lock_queues();
3258 if (! queue_empty(&vm_page_queue_throttled)) {
3259 /*
3260 * Switch "throttled" pages to "active".
3261 */
3262 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3263 VM_PAGE_CHECK(m);
3264 assert(m->throttled);
3265 assert(!m->active);
3266 assert(!m->inactive);
3267 assert(!m->speculative);
3268 assert(!VM_PAGE_WIRED(m));
6d2010ae
A
3269
3270 extra_active_count++;
39236c6e
A
3271 if (m->object->internal) {
3272 extra_internal_count++;
3273 } else {
3274 extra_external_count++;
3275 }
6d2010ae 3276
b0d623f7
A
3277 m->throttled = FALSE;
3278 m->active = TRUE;
3279 VM_PAGE_CHECK(m);
3280 }
3281
3282 /*
3283 * Transfer the entire throttled queue to a regular LRU page queues.
3284 * We insert it at the head of the active queue, so that these pages
3285 * get re-evaluated by the LRU algorithm first, since they've been
3286 * completely out of it until now.
3287 */
3288 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3289 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3290 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3291 if (queue_empty(&vm_page_queue_active)) {
3292 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3293 } else {
3294 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3295 }
3296 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3297 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3298 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3299
3300#if DEBUG
3301 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3302#endif
3303 queue_init(&vm_page_queue_throttled);
3304 /*
3305 * Adjust the global page counts.
3306 */
3307 vm_page_active_count += extra_active_count;
39236c6e
A
3308 vm_page_pageable_internal_count += extra_internal_count;
3309 vm_page_pageable_external_count += extra_external_count;
b0d623f7
A
3310 vm_page_throttled_count = 0;
3311 }
3312 assert(vm_page_throttled_count == 0);
3313 assert(queue_empty(&vm_page_queue_throttled));
3314 vm_page_unlock_queues();
3315}
3316
3317
3318/*
3319 * move pages from the indicated local queue to the global active queue
3320 * its ok to fail if we're below the hard limit and force == FALSE
3321 * the nolocks == TRUE case is to allow this function to be run on
3322 * the hibernate path
3323 */
3324
3325void
3326vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3327{
3328 struct vpl *lq;
3329 vm_page_t first_local, last_local;
3330 vm_page_t first_active;
3331 vm_page_t m;
3332 uint32_t count = 0;
3333
3334 if (vm_page_local_q == NULL)
3335 return;
3336
3337 lq = &vm_page_local_q[lid].vpl_un.vpl;
3338
3339 if (nolocks == FALSE) {
3340 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3341 if ( !vm_page_trylockspin_queues())
3342 return;
3343 } else
3344 vm_page_lockspin_queues();
3345
3346 VPL_LOCK(&lq->vpl_lock);
3347 }
3348 if (lq->vpl_count) {
3349 /*
3350 * Switch "local" pages to "active".
3351 */
3352 assert(!queue_empty(&lq->vpl_queue));
3353
3354 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3355 VM_PAGE_CHECK(m);
3356 assert(m->local);
3357 assert(!m->active);
3358 assert(!m->inactive);
3359 assert(!m->speculative);
3360 assert(!VM_PAGE_WIRED(m));
3361 assert(!m->throttled);
3362 assert(!m->fictitious);
3363
3364 if (m->local_id != lid)
3365 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3366
3367 m->local_id = 0;
3368 m->local = FALSE;
3369 m->active = TRUE;
3370 VM_PAGE_CHECK(m);
3371
3372 count++;
3373 }
3374 if (count != lq->vpl_count)
3375 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3376
3377 /*
3378 * Transfer the entire local queue to a regular LRU page queues.
3379 */
3380 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3381 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3382 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3383
3384 if (queue_empty(&vm_page_queue_active)) {
3385 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3386 } else {
3387 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3388 }
3389 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3390 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3391 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3392
3393 queue_init(&lq->vpl_queue);
3394 /*
3395 * Adjust the global page counts.
3396 */
3397 vm_page_active_count += lq->vpl_count;
39236c6e
A
3398 vm_page_pageable_internal_count += lq->vpl_internal_count;
3399 vm_page_pageable_external_count += lq->vpl_external_count;
b0d623f7 3400 lq->vpl_count = 0;
39236c6e
A
3401 lq->vpl_internal_count = 0;
3402 lq->vpl_external_count = 0;
b0d623f7
A
3403 }
3404 assert(queue_empty(&lq->vpl_queue));
3405
3406 if (nolocks == FALSE) {
3407 VPL_UNLOCK(&lq->vpl_lock);
3408 vm_page_unlock_queues();
3409 }
3410}
3411
1c79356b
A
3412/*
3413 * vm_page_part_zero_fill:
3414 *
3415 * Zero-fill a part of the page.
3416 */
39236c6e 3417#define PMAP_ZERO_PART_PAGE_IMPLEMENTED
1c79356b
A
3418void
3419vm_page_part_zero_fill(
3420 vm_page_t m,
3421 vm_offset_t m_pa,
3422 vm_size_t len)
3423{
1c79356b 3424
316670eb
A
3425#if 0
3426 /*
3427 * we don't hold the page queue lock
3428 * so this check isn't safe to make
3429 */
1c79356b 3430 VM_PAGE_CHECK(m);
316670eb
A
3431#endif
3432
1c79356b 3433#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
55e303ae 3434 pmap_zero_part_page(m->phys_page, m_pa, len);
1c79356b 3435#else
39236c6e 3436 vm_page_t tmp;
1c79356b
A
3437 while (1) {
3438 tmp = vm_page_grab();
3439 if (tmp == VM_PAGE_NULL) {
3440 vm_page_wait(THREAD_UNINT);
3441 continue;
3442 }
3443 break;
3444 }
3445 vm_page_zero_fill(tmp);
3446 if(m_pa != 0) {
3447 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3448 }
3449 if((m_pa + len) < PAGE_SIZE) {
3450 vm_page_part_copy(m, m_pa + len, tmp,
3451 m_pa + len, PAGE_SIZE - (m_pa + len));
3452 }
3453 vm_page_copy(tmp,m);
b0d623f7 3454 VM_PAGE_FREE(tmp);
1c79356b
A
3455#endif
3456
3457}
3458
3459/*
3460 * vm_page_zero_fill:
3461 *
3462 * Zero-fill the specified page.
3463 */
3464void
3465vm_page_zero_fill(
3466 vm_page_t m)
3467{
3468 XPR(XPR_VM_PAGE,
3469 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 3470 m->object, m->offset, m, 0,0);
316670eb
A
3471#if 0
3472 /*
3473 * we don't hold the page queue lock
3474 * so this check isn't safe to make
3475 */
1c79356b 3476 VM_PAGE_CHECK(m);
316670eb 3477#endif
1c79356b 3478
55e303ae
A
3479// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3480 pmap_zero_page(m->phys_page);
1c79356b
A
3481}
3482
3483/*
3484 * vm_page_part_copy:
3485 *
3486 * copy part of one page to another
3487 */
3488
3489void
3490vm_page_part_copy(
3491 vm_page_t src_m,
3492 vm_offset_t src_pa,
3493 vm_page_t dst_m,
3494 vm_offset_t dst_pa,
3495 vm_size_t len)
3496{
316670eb
A
3497#if 0
3498 /*
3499 * we don't hold the page queue lock
3500 * so this check isn't safe to make
3501 */
1c79356b
A
3502 VM_PAGE_CHECK(src_m);
3503 VM_PAGE_CHECK(dst_m);
316670eb 3504#endif
55e303ae
A
3505 pmap_copy_part_page(src_m->phys_page, src_pa,
3506 dst_m->phys_page, dst_pa, len);
1c79356b
A
3507}
3508
3509/*
3510 * vm_page_copy:
3511 *
3512 * Copy one page to another
91447636
A
3513 *
3514 * ENCRYPTED SWAP:
3515 * The source page should not be encrypted. The caller should
3516 * make sure the page is decrypted first, if necessary.
1c79356b
A
3517 */
3518
2d21ac55
A
3519int vm_page_copy_cs_validations = 0;
3520int vm_page_copy_cs_tainted = 0;
3521
1c79356b
A
3522void
3523vm_page_copy(
3524 vm_page_t src_m,
3525 vm_page_t dest_m)
3526{
3527 XPR(XPR_VM_PAGE,
3528 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
b0d623f7
A
3529 src_m->object, src_m->offset,
3530 dest_m->object, dest_m->offset,
1c79356b 3531 0);
316670eb
A
3532#if 0
3533 /*
3534 * we don't hold the page queue lock
3535 * so this check isn't safe to make
3536 */
1c79356b
A
3537 VM_PAGE_CHECK(src_m);
3538 VM_PAGE_CHECK(dest_m);
316670eb
A
3539#endif
3540 vm_object_lock_assert_held(src_m->object);
1c79356b 3541
91447636
A
3542 /*
3543 * ENCRYPTED SWAP:
3544 * The source page should not be encrypted at this point.
3545 * The destination page will therefore not contain encrypted
3546 * data after the copy.
3547 */
3548 if (src_m->encrypted) {
3549 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3550 }
3551 dest_m->encrypted = FALSE;
3552
2d21ac55 3553 if (src_m->object != VM_OBJECT_NULL &&
4a3eedf9 3554 src_m->object->code_signed) {
2d21ac55 3555 /*
4a3eedf9 3556 * We're copying a page from a code-signed object.
2d21ac55
A
3557 * Whoever ends up mapping the copy page might care about
3558 * the original page's integrity, so let's validate the
3559 * source page now.
3560 */
3561 vm_page_copy_cs_validations++;
3562 vm_page_validate_cs(src_m);
3563 }
6d2010ae
A
3564
3565 if (vm_page_is_slideable(src_m)) {
3566 boolean_t was_busy = src_m->busy;
3567 src_m->busy = TRUE;
3568 (void) vm_page_slide(src_m, 0);
3569 assert(src_m->busy);
316670eb 3570 if (!was_busy) {
6d2010ae
A
3571 PAGE_WAKEUP_DONE(src_m);
3572 }
3573 }
3574
2d21ac55 3575 /*
b0d623f7
A
3576 * Propagate the cs_tainted bit to the copy page. Do not propagate
3577 * the cs_validated bit.
2d21ac55 3578 */
2d21ac55
A
3579 dest_m->cs_tainted = src_m->cs_tainted;
3580 if (dest_m->cs_tainted) {
2d21ac55
A
3581 vm_page_copy_cs_tainted++;
3582 }
6d2010ae
A
3583 dest_m->slid = src_m->slid;
3584 dest_m->error = src_m->error; /* sliding src_m might have failed... */
55e303ae 3585 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
1c79356b
A
3586}
3587
2d21ac55 3588#if MACH_ASSERT
b0d623f7
A
3589static void
3590_vm_page_print(
3591 vm_page_t p)
3592{
3593 printf("vm_page %p: \n", p);
3594 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3595 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3596 printf(" next=%p\n", p->next);
3597 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3598 printf(" wire_count=%u\n", p->wire_count);
3599
3600 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3601 (p->local ? "" : "!"),
3602 (p->inactive ? "" : "!"),
3603 (p->active ? "" : "!"),
3604 (p->pageout_queue ? "" : "!"),
3605 (p->speculative ? "" : "!"),
3606 (p->laundry ? "" : "!"));
3607 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3608 (p->free ? "" : "!"),
3609 (p->reference ? "" : "!"),
3610 (p->gobbled ? "" : "!"),
3611 (p->private ? "" : "!"),
3612 (p->throttled ? "" : "!"));
3613 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3614 (p->busy ? "" : "!"),
3615 (p->wanted ? "" : "!"),
3616 (p->tabled ? "" : "!"),
3617 (p->fictitious ? "" : "!"),
3618 (p->pmapped ? "" : "!"),
3619 (p->wpmapped ? "" : "!"));
3620 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3621 (p->pageout ? "" : "!"),
3622 (p->absent ? "" : "!"),
3623 (p->error ? "" : "!"),
3624 (p->dirty ? "" : "!"),
3625 (p->cleaning ? "" : "!"),
3626 (p->precious ? "" : "!"),
3627 (p->clustered ? "" : "!"));
3628 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3629 (p->overwriting ? "" : "!"),
3630 (p->restart ? "" : "!"),
3631 (p->unusual ? "" : "!"),
3632 (p->encrypted ? "" : "!"),
3633 (p->encrypted_cleaning ? "" : "!"));
316670eb 3634 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
b0d623f7
A
3635 (p->cs_validated ? "" : "!"),
3636 (p->cs_tainted ? "" : "!"),
3637 (p->no_cache ? "" : "!"));
b0d623f7
A
3638
3639 printf("phys_page=0x%x\n", p->phys_page);
3640}
3641
1c79356b
A
3642/*
3643 * Check that the list of pages is ordered by
3644 * ascending physical address and has no holes.
3645 */
2d21ac55 3646static int
1c79356b
A
3647vm_page_verify_contiguous(
3648 vm_page_t pages,
3649 unsigned int npages)
3650{
3651 register vm_page_t m;
3652 unsigned int page_count;
91447636 3653 vm_offset_t prev_addr;
1c79356b 3654
55e303ae 3655 prev_addr = pages->phys_page;
1c79356b
A
3656 page_count = 1;
3657 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
55e303ae 3658 if (m->phys_page != prev_addr + 1) {
b0d623f7
A
3659 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3660 m, (long)prev_addr, m->phys_page);
6d2010ae 3661 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
3662 panic("vm_page_verify_contiguous: not contiguous!");
3663 }
55e303ae 3664 prev_addr = m->phys_page;
1c79356b
A
3665 ++page_count;
3666 }
3667 if (page_count != npages) {
2d21ac55 3668 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
3669 pages, page_count, npages);
3670 panic("vm_page_verify_contiguous: count error");
3671 }
3672 return 1;
3673}
1c79356b
A
3674
3675
2d21ac55
A
3676/*
3677 * Check the free lists for proper length etc.
3678 */
b0d623f7
A
3679static unsigned int
3680vm_page_verify_free_list(
d1ecb069 3681 queue_head_t *vm_page_queue,
b0d623f7
A
3682 unsigned int color,
3683 vm_page_t look_for_page,
3684 boolean_t expect_page)
3685{
3686 unsigned int npages;
3687 vm_page_t m;
3688 vm_page_t prev_m;
3689 boolean_t found_page;
3690
3691 found_page = FALSE;
3692 npages = 0;
d1ecb069
A
3693 prev_m = (vm_page_t) vm_page_queue;
3694 queue_iterate(vm_page_queue,
b0d623f7
A
3695 m,
3696 vm_page_t,
3697 pageq) {
6d2010ae 3698
b0d623f7
A
3699 if (m == look_for_page) {
3700 found_page = TRUE;
3701 }
3702 if ((vm_page_t) m->pageq.prev != prev_m)
3703 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3704 color, npages, m, m->pageq.prev, prev_m);
b0d623f7
A
3705 if ( ! m->busy )
3706 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3707 color, npages, m);
6d2010ae
A
3708 if (color != (unsigned int) -1) {
3709 if ((m->phys_page & vm_color_mask) != color)
3710 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3711 color, npages, m, m->phys_page & vm_color_mask, color);
3712 if ( ! m->free )
3713 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3714 color, npages, m);
3715 }
b0d623f7
A
3716 ++npages;
3717 prev_m = m;
3718 }
3719 if (look_for_page != VM_PAGE_NULL) {
3720 unsigned int other_color;
3721
3722 if (expect_page && !found_page) {
3723 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3724 color, npages, look_for_page, look_for_page->phys_page);
3725 _vm_page_print(look_for_page);
3726 for (other_color = 0;
3727 other_color < vm_colors;
3728 other_color++) {
3729 if (other_color == color)
3730 continue;
d1ecb069 3731 vm_page_verify_free_list(&vm_page_queue_free[other_color],
6d2010ae 3732 other_color, look_for_page, FALSE);
b0d623f7 3733 }
6d2010ae 3734 if (color == (unsigned int) -1) {
d1ecb069
A
3735 vm_page_verify_free_list(&vm_lopage_queue_free,
3736 (unsigned int) -1, look_for_page, FALSE);
3737 }
b0d623f7
A
3738 panic("vm_page_verify_free_list(color=%u)\n", color);
3739 }
3740 if (!expect_page && found_page) {
3741 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3742 color, npages, look_for_page, look_for_page->phys_page);
3743 }
3744 }
3745 return npages;
3746}
3747
3748static boolean_t vm_page_verify_free_lists_enabled = FALSE;
2d21ac55
A
3749static void
3750vm_page_verify_free_lists( void )
3751{
d1ecb069 3752 unsigned int color, npages, nlopages;
b0d623f7
A
3753
3754 if (! vm_page_verify_free_lists_enabled)
3755 return;
3756
2d21ac55 3757 npages = 0;
b0d623f7
A
3758
3759 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3760
3761 for( color = 0; color < vm_colors; color++ ) {
d1ecb069 3762 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
6d2010ae 3763 color, VM_PAGE_NULL, FALSE);
2d21ac55 3764 }
d1ecb069
A
3765 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3766 (unsigned int) -1,
3767 VM_PAGE_NULL, FALSE);
3768 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3769 panic("vm_page_verify_free_lists: "
3770 "npages %u free_count %d nlopages %u lo_free_count %u",
3771 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
6d2010ae 3772
b0d623f7 3773 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3774}
2d21ac55 3775
b0d623f7
A
3776void
3777vm_page_queues_assert(
3778 vm_page_t mem,
3779 int val)
3780{
316670eb
A
3781#if DEBUG
3782 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3783#endif
b0d623f7
A
3784 if (mem->free + mem->active + mem->inactive + mem->speculative +
3785 mem->throttled + mem->pageout_queue > (val)) {
3786 _vm_page_print(mem);
3787 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3788 }
3789 if (VM_PAGE_WIRED(mem)) {
3790 assert(!mem->active);
3791 assert(!mem->inactive);
3792 assert(!mem->speculative);
3793 assert(!mem->throttled);
316670eb 3794 assert(!mem->pageout_queue);
b0d623f7
A
3795 }
3796}
3797#endif /* MACH_ASSERT */
2d21ac55 3798
91447636 3799
1c79356b 3800/*
2d21ac55 3801 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
3802 *
3803 * Find a region large enough to contain at least n pages
1c79356b
A
3804 * of contiguous physical memory.
3805 *
2d21ac55
A
3806 * This is done by traversing the vm_page_t array in a linear fashion
3807 * we assume that the vm_page_t array has the avaiable physical pages in an
3808 * ordered, ascending list... this is currently true of all our implementations
3809 * and must remain so... there can be 'holes' in the array... we also can
3810 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3811 * which use to happen via 'vm_page_convert'... that function was no longer
3812 * being called and was removed...
3813 *
3814 * The basic flow consists of stabilizing some of the interesting state of
3815 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3816 * sweep at the beginning of the array looking for pages that meet our criterea
3817 * for a 'stealable' page... currently we are pretty conservative... if the page
3818 * meets this criterea and is physically contiguous to the previous page in the 'run'
3819 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3820 * and start to develop a new run... if at this point we've already considered
3821 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3822 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3823 * to other threads trying to acquire free pages (or move pages from q to q),
3824 * and then continue from the spot we left off... we only make 1 pass through the
3825 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3826 * which steals the pages from the queues they're currently on... pages on the free
3827 * queue can be stolen directly... pages that are on any of the other queues
3828 * must be removed from the object they are tabled on... this requires taking the
3829 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3830 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3831 * dump the pages we've currently stolen back to the free list, and pick up our
3832 * scan from the point where we aborted the 'current' run.
3833 *
3834 *
1c79356b 3835 * Requirements:
2d21ac55 3836 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 3837 *
2d21ac55 3838 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 3839 *
e5568f75 3840 * Algorithm:
1c79356b 3841 */
2d21ac55
A
3842
3843#define MAX_CONSIDERED_BEFORE_YIELD 1000
3844
3845
3846#define RESET_STATE_OF_RUN() \
3847 MACRO_BEGIN \
3848 prevcontaddr = -2; \
b0d623f7 3849 start_pnum = -1; \
2d21ac55
A
3850 free_considered = 0; \
3851 substitute_needed = 0; \
3852 npages = 0; \
3853 MACRO_END
3854
b0d623f7
A
3855/*
3856 * Can we steal in-use (i.e. not free) pages when searching for
3857 * physically-contiguous pages ?
3858 */
3859#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3860
3861static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3862#if DEBUG
3863int vm_page_find_contig_debug = 0;
3864#endif
2d21ac55 3865
1c79356b
A
3866static vm_page_t
3867vm_page_find_contiguous(
2d21ac55
A
3868 unsigned int contig_pages,
3869 ppnum_t max_pnum,
b0d623f7
A
3870 ppnum_t pnum_mask,
3871 boolean_t wire,
3872 int flags)
1c79356b 3873{
2d21ac55 3874 vm_page_t m = NULL;
e5568f75 3875 ppnum_t prevcontaddr;
b0d623f7
A
3876 ppnum_t start_pnum;
3877 unsigned int npages, considered, scanned;
3878 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3879 unsigned int idx_last_contig_page_found = 0;
2d21ac55
A
3880 int free_considered, free_available;
3881 int substitute_needed;
b0d623f7 3882 boolean_t wrapped;
593a1d5f 3883#if DEBUG
b0d623f7
A
3884 clock_sec_t tv_start_sec, tv_end_sec;
3885 clock_usec_t tv_start_usec, tv_end_usec;
593a1d5f
A
3886#endif
3887#if MACH_ASSERT
2d21ac55
A
3888 int yielded = 0;
3889 int dumped_run = 0;
3890 int stolen_pages = 0;
39236c6e 3891 int compressed_pages = 0;
91447636 3892#endif
1c79356b 3893
2d21ac55 3894 if (contig_pages == 0)
1c79356b
A
3895 return VM_PAGE_NULL;
3896
2d21ac55
A
3897#if MACH_ASSERT
3898 vm_page_verify_free_lists();
593a1d5f
A
3899#endif
3900#if DEBUG
2d21ac55
A
3901 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3902#endif
39236c6e
A
3903 PAGE_REPLACEMENT_ALLOWED(TRUE);
3904
2d21ac55 3905 vm_page_lock_queues();
b0d623f7 3906 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3907
3908 RESET_STATE_OF_RUN();
1c79356b 3909
b0d623f7 3910 scanned = 0;
2d21ac55
A
3911 considered = 0;
3912 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 3913
b0d623f7
A
3914 wrapped = FALSE;
3915
3916 if(flags & KMA_LOMEM)
3917 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3918 else
3919 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3920
3921 orig_last_idx = idx_last_contig_page_found;
3922 last_idx = orig_last_idx;
3923
3924 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
3925 npages < contig_pages && page_idx < vm_pages_count;
3926 page_idx++) {
b0d623f7
A
3927retry:
3928 if (wrapped &&
3929 npages == 0 &&
3930 page_idx >= orig_last_idx) {
3931 /*
3932 * We're back where we started and we haven't
3933 * found any suitable contiguous range. Let's
3934 * give up.
3935 */
3936 break;
3937 }
3938 scanned++;
2d21ac55 3939 m = &vm_pages[page_idx];
e5568f75 3940
b0d623f7
A
3941 assert(!m->fictitious);
3942 assert(!m->private);
3943
2d21ac55
A
3944 if (max_pnum && m->phys_page > max_pnum) {
3945 /* no more low pages... */
3946 break;
e5568f75 3947 }
6d2010ae 3948 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
b0d623f7
A
3949 /*
3950 * not aligned
3951 */
3952 RESET_STATE_OF_RUN();
3953
3954 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
39236c6e
A
3955 m->encrypted_cleaning ||
3956 m->pageout_queue || m->laundry || m->wanted ||
3957 m->cleaning || m->overwriting || m->pageout) {
2d21ac55
A
3958 /*
3959 * page is in a transient state
3960 * or a state we don't want to deal
3961 * with, so don't consider it which
3962 * means starting a new run
3963 */
3964 RESET_STATE_OF_RUN();
1c79356b 3965
39236c6e 3966 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
2d21ac55
A
3967 /*
3968 * page needs to be on one of our queues
39236c6e 3969 * or it needs to belong to the compressor pool
2d21ac55
A
3970 * in order for it to be stable behind the
3971 * locks we hold at this point...
3972 * if not, don't consider it which
3973 * means starting a new run
3974 */
3975 RESET_STATE_OF_RUN();
3976
3977 } else if (!m->free && (!m->tabled || m->busy)) {
3978 /*
3979 * pages on the free list are always 'busy'
3980 * so we couldn't test for 'busy' in the check
3981 * for the transient states... pages that are
3982 * 'free' are never 'tabled', so we also couldn't
3983 * test for 'tabled'. So we check here to make
3984 * sure that a non-free page is not busy and is
3985 * tabled on an object...
3986 * if not, don't consider it which
3987 * means starting a new run
3988 */
3989 RESET_STATE_OF_RUN();
3990
3991 } else {
3992 if (m->phys_page != prevcontaddr + 1) {
b0d623f7
A
3993 if ((m->phys_page & pnum_mask) != 0) {
3994 RESET_STATE_OF_RUN();
3995 goto did_consider;
3996 } else {
3997 npages = 1;
3998 start_idx = page_idx;
3999 start_pnum = m->phys_page;
4000 }
2d21ac55
A
4001 } else {
4002 npages++;
e5568f75 4003 }
2d21ac55 4004 prevcontaddr = m->phys_page;
b0d623f7
A
4005
4006 VM_PAGE_CHECK(m);
2d21ac55
A
4007 if (m->free) {
4008 free_considered++;
b0d623f7
A
4009 } else {
4010 /*
4011 * This page is not free.
4012 * If we can't steal used pages,
4013 * we have to give up this run
4014 * and keep looking.
4015 * Otherwise, we might need to
4016 * move the contents of this page
4017 * into a substitute page.
4018 */
4019#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
39236c6e 4020 if (m->pmapped || m->dirty || m->precious) {
b0d623f7
A
4021 substitute_needed++;
4022 }
4023#else
4024 RESET_STATE_OF_RUN();
4025#endif
2d21ac55 4026 }
b0d623f7 4027
2d21ac55
A
4028 if ((free_considered + substitute_needed) > free_available) {
4029 /*
4030 * if we let this run continue
4031 * we will end up dropping the vm_page_free_count
4032 * below the reserve limit... we need to abort
4033 * this run, but we can at least re-consider this
4034 * page... thus the jump back to 'retry'
4035 */
4036 RESET_STATE_OF_RUN();
4037
4038 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4039 considered++;
4040 goto retry;
e5568f75 4041 }
2d21ac55
A
4042 /*
4043 * free_available == 0
4044 * so can't consider any free pages... if
4045 * we went to retry in this case, we'd
4046 * get stuck looking at the same page
4047 * w/o making any forward progress
4048 * we also want to take this path if we've already
4049 * reached our limit that controls the lock latency
4050 */
e5568f75 4051 }
2d21ac55 4052 }
b0d623f7 4053did_consider:
2d21ac55 4054 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
39236c6e
A
4055
4056 PAGE_REPLACEMENT_ALLOWED(FALSE);
4057
b0d623f7 4058 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 4059 vm_page_unlock_queues();
e5568f75 4060
2d21ac55
A
4061 mutex_pause(0);
4062
39236c6e
A
4063 PAGE_REPLACEMENT_ALLOWED(TRUE);
4064
2d21ac55 4065 vm_page_lock_queues();
b0d623f7 4066 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
4067
4068 RESET_STATE_OF_RUN();
1c79356b 4069 /*
2d21ac55
A
4070 * reset our free page limit since we
4071 * dropped the lock protecting the vm_page_free_queue
1c79356b 4072 */
2d21ac55
A
4073 free_available = vm_page_free_count - vm_page_free_reserved;
4074 considered = 0;
4075#if MACH_ASSERT
4076 yielded++;
4077#endif
4078 goto retry;
4079 }
4080 considered++;
4081 }
4082 m = VM_PAGE_NULL;
4083
b0d623f7
A
4084 if (npages != contig_pages) {
4085 if (!wrapped) {
4086 /*
4087 * We didn't find a contiguous range but we didn't
4088 * start from the very first page.
4089 * Start again from the very first page.
4090 */
4091 RESET_STATE_OF_RUN();
4092 if( flags & KMA_LOMEM)
4093 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4094 else
4095 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4096 last_idx = 0;
4097 page_idx = last_idx;
4098 wrapped = TRUE;
4099 goto retry;
4100 }
4101 lck_mtx_unlock(&vm_page_queue_free_lock);
4102 } else {
2d21ac55
A
4103 vm_page_t m1;
4104 vm_page_t m2;
4105 unsigned int cur_idx;
4106 unsigned int tmp_start_idx;
4107 vm_object_t locked_object = VM_OBJECT_NULL;
4108 boolean_t abort_run = FALSE;
4109
b0d623f7
A
4110 assert(page_idx - start_idx == contig_pages);
4111
2d21ac55
A
4112 tmp_start_idx = start_idx;
4113
4114 /*
4115 * first pass through to pull the free pages
4116 * off of the free queue so that in case we
4117 * need substitute pages, we won't grab any
4118 * of the free pages in the run... we'll clear
4119 * the 'free' bit in the 2nd pass, and even in
4120 * an abort_run case, we'll collect all of the
4121 * free pages in this run and return them to the free list
4122 */
4123 while (start_idx < page_idx) {
4124
4125 m1 = &vm_pages[start_idx++];
4126
b0d623f7
A
4127#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4128 assert(m1->free);
4129#endif
4130
2d21ac55 4131 if (m1->free) {
0b4c1975 4132 unsigned int color;
2d21ac55 4133
0b4c1975 4134 color = m1->phys_page & vm_color_mask;
b0d623f7 4135#if MACH_ASSERT
6d2010ae 4136 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
b0d623f7 4137#endif
0b4c1975
A
4138 queue_remove(&vm_page_queue_free[color],
4139 m1,
4140 vm_page_t,
4141 pageq);
d1ecb069
A
4142 m1->pageq.next = NULL;
4143 m1->pageq.prev = NULL;
0b4c1975 4144#if MACH_ASSERT
6d2010ae 4145 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
0b4c1975 4146#endif
b0d623f7
A
4147 /*
4148 * Clear the "free" bit so that this page
4149 * does not get considered for another
4150 * concurrent physically-contiguous allocation.
4151 */
4152 m1->free = FALSE;
4153 assert(m1->busy);
0b4c1975
A
4154
4155 vm_page_free_count--;
2d21ac55
A
4156 }
4157 }
4158 /*
4159 * adjust global freelist counts
4160 */
4161 if (vm_page_free_count < vm_page_free_count_minimum)
4162 vm_page_free_count_minimum = vm_page_free_count;
4163
b0d623f7
A
4164 if( flags & KMA_LOMEM)
4165 vm_page_lomem_find_contiguous_last_idx = page_idx;
4166 else
4167 vm_page_find_contiguous_last_idx = page_idx;
4168
2d21ac55
A
4169 /*
4170 * we can drop the free queue lock at this point since
4171 * we've pulled any 'free' candidates off of the list
4172 * we need it dropped so that we can do a vm_page_grab
4173 * when substituing for pmapped/dirty pages
4174 */
b0d623f7 4175 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
4176
4177 start_idx = tmp_start_idx;
4178 cur_idx = page_idx - 1;
4179
4180 while (start_idx++ < page_idx) {
4181 /*
4182 * must go through the list from back to front
4183 * so that the page list is created in the
4184 * correct order - low -> high phys addresses
4185 */
4186 m1 = &vm_pages[cur_idx--];
4187
b0d623f7 4188 assert(!m1->free);
39236c6e 4189
b0d623f7 4190 if (m1->object == VM_OBJECT_NULL) {
2d21ac55 4191 /*
b0d623f7 4192 * page has already been removed from
2d21ac55
A
4193 * the free list in the 1st pass
4194 */
b0d623f7 4195 assert(m1->offset == (vm_object_offset_t) -1);
2d21ac55
A
4196 assert(m1->busy);
4197 assert(!m1->wanted);
4198 assert(!m1->laundry);
e5568f75 4199 } else {
2d21ac55 4200 vm_object_t object;
39236c6e
A
4201 int refmod;
4202 boolean_t disconnected, reusable;
2d21ac55
A
4203
4204 if (abort_run == TRUE)
4205 continue;
4206
4207 object = m1->object;
4208
4209 if (object != locked_object) {
4210 if (locked_object) {
4211 vm_object_unlock(locked_object);
4212 locked_object = VM_OBJECT_NULL;
4213 }
4214 if (vm_object_lock_try(object))
4215 locked_object = object;
4216 }
4217 if (locked_object == VM_OBJECT_NULL ||
b0d623f7 4218 (VM_PAGE_WIRED(m1) || m1->gobbled ||
39236c6e
A
4219 m1->encrypted_cleaning ||
4220 m1->pageout_queue || m1->laundry || m1->wanted ||
4221 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
2d21ac55
A
4222
4223 if (locked_object) {
4224 vm_object_unlock(locked_object);
4225 locked_object = VM_OBJECT_NULL;
4226 }
4227 tmp_start_idx = cur_idx;
4228 abort_run = TRUE;
4229 continue;
4230 }
39236c6e
A
4231
4232 disconnected = FALSE;
4233 reusable = FALSE;
4234
4235 if ((m1->reusable ||
4236 m1->object->all_reusable) &&
4237 m1->inactive &&
4238 !m1->dirty &&
4239 !m1->reference) {
4240 /* reusable page... */
4241 refmod = pmap_disconnect(m1->phys_page);
4242 disconnected = TRUE;
4243 if (refmod == 0) {
4244 /*
4245 * ... not reused: can steal
4246 * without relocating contents.
4247 */
4248 reusable = TRUE;
4249 }
4250 }
4251
4252 if ((m1->pmapped &&
4253 ! reusable) ||
4254 m1->dirty ||
4255 m1->precious) {
2d21ac55
A
4256 vm_object_offset_t offset;
4257
4258 m2 = vm_page_grab();
4259
4260 if (m2 == VM_PAGE_NULL) {
4261 if (locked_object) {
4262 vm_object_unlock(locked_object);
4263 locked_object = VM_OBJECT_NULL;
4264 }
4265 tmp_start_idx = cur_idx;
4266 abort_run = TRUE;
4267 continue;
4268 }
39236c6e
A
4269 if (! disconnected) {
4270 if (m1->pmapped)
4271 refmod = pmap_disconnect(m1->phys_page);
4272 else
4273 refmod = 0;
4274 }
4275
4276 /* copy the page's contents */
4277 pmap_copy_page(m1->phys_page, m2->phys_page);
4278 /* copy the page's state */
4279 assert(!VM_PAGE_WIRED(m1));
4280 assert(!m1->free);
4281 assert(!m1->pageout_queue);
4282 assert(!m1->laundry);
4283 m2->reference = m1->reference;
4284 assert(!m1->gobbled);
4285 assert(!m1->private);
4286 m2->no_cache = m1->no_cache;
4287 m2->xpmapped = m1->xpmapped;
4288 assert(!m1->busy);
4289 assert(!m1->wanted);
4290 assert(!m1->fictitious);
4291 m2->pmapped = m1->pmapped; /* should flush cache ? */
4292 m2->wpmapped = m1->wpmapped;
4293 assert(!m1->pageout);
4294 m2->absent = m1->absent;
4295 m2->error = m1->error;
4296 m2->dirty = m1->dirty;
4297 assert(!m1->cleaning);
4298 m2->precious = m1->precious;
4299 m2->clustered = m1->clustered;
4300 assert(!m1->overwriting);
4301 m2->restart = m1->restart;
4302 m2->unusual = m1->unusual;
4303 m2->encrypted = m1->encrypted;
4304 assert(!m1->encrypted_cleaning);
4305 m2->cs_validated = m1->cs_validated;
4306 m2->cs_tainted = m1->cs_tainted;
4307
4308 /*
4309 * If m1 had really been reusable,
4310 * we would have just stolen it, so
4311 * let's not propagate it's "reusable"
4312 * bit and assert that m2 is not
4313 * marked as "reusable".
4314 */
4315 // m2->reusable = m1->reusable;
4316 assert(!m2->reusable);
4317
4318 assert(!m1->lopage);
4319 m2->slid = m1->slid;
4320 m2->was_dirty = m1->was_dirty;
4321 m2->compressor = m1->compressor;
4322
15129b1c
A
4323 /*
4324 * page may need to be flushed if
4325 * it is marshalled into a UPL
4326 * that is going to be used by a device
4327 * that doesn't support coherency
4328 */
4329 m2->written_by_kernel = TRUE;
4330
39236c6e
A
4331 /*
4332 * make sure we clear the ref/mod state
4333 * from the pmap layer... else we risk
4334 * inheriting state from the last time
4335 * this page was used...
4336 */
4337 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
2d21ac55
A
4338
4339 if (refmod & VM_MEM_REFERENCED)
4340 m2->reference = TRUE;
316670eb
A
4341 if (refmod & VM_MEM_MODIFIED) {
4342 SET_PAGE_DIRTY(m2, TRUE);
4343 }
2d21ac55
A
4344 offset = m1->offset;
4345
4346 /*
4347 * completely cleans up the state
4348 * of the page so that it is ready
4349 * to be put onto the free list, or
4350 * for this purpose it looks like it
4351 * just came off of the free list
4352 */
4353 vm_page_free_prepare(m1);
4354
4355 /*
39236c6e
A
4356 * now put the substitute page
4357 * on the object
2d21ac55 4358 */
316670eb 4359 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
2d21ac55 4360
39236c6e
A
4361 if (m2->compressor) {
4362 m2->pmapped = TRUE;
4363 m2->wpmapped = TRUE;
2d21ac55 4364
39236c6e
A
4365 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4366 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4367#if MACH_ASSERT
4368 compressed_pages++;
4369#endif
4370 } else {
4371 if (m2->reference)
4372 vm_page_activate(m2);
4373 else
4374 vm_page_deactivate(m2);
4375 }
2d21ac55
A
4376 PAGE_WAKEUP_DONE(m2);
4377
4378 } else {
39236c6e
A
4379 assert(!m1->compressor);
4380
2d21ac55
A
4381 /*
4382 * completely cleans up the state
4383 * of the page so that it is ready
4384 * to be put onto the free list, or
4385 * for this purpose it looks like it
4386 * just came off of the free list
4387 */
4388 vm_page_free_prepare(m1);
4389 }
4390#if MACH_ASSERT
4391 stolen_pages++;
4392#endif
1c79356b 4393 }
2d21ac55
A
4394 m1->pageq.next = (queue_entry_t) m;
4395 m1->pageq.prev = NULL;
4396 m = m1;
e5568f75 4397 }
2d21ac55
A
4398 if (locked_object) {
4399 vm_object_unlock(locked_object);
4400 locked_object = VM_OBJECT_NULL;
1c79356b
A
4401 }
4402
2d21ac55
A
4403 if (abort_run == TRUE) {
4404 if (m != VM_PAGE_NULL) {
b0d623f7 4405 vm_page_free_list(m, FALSE);
2d21ac55
A
4406 }
4407#if MACH_ASSERT
4408 dumped_run++;
4409#endif
4410 /*
4411 * want the index of the last
4412 * page in this run that was
4413 * successfully 'stolen', so back
4414 * it up 1 for the auto-decrement on use
4415 * and 1 more to bump back over this page
4416 */
4417 page_idx = tmp_start_idx + 2;
b0d623f7
A
4418 if (page_idx >= vm_pages_count) {
4419 if (wrapped)
4420 goto done_scanning;
4421 page_idx = last_idx = 0;
4422 wrapped = TRUE;
4423 }
4424 abort_run = FALSE;
4425
2d21ac55 4426 /*
b0d623f7
A
4427 * We didn't find a contiguous range but we didn't
4428 * start from the very first page.
4429 * Start again from the very first page.
2d21ac55 4430 */
b0d623f7
A
4431 RESET_STATE_OF_RUN();
4432
4433 if( flags & KMA_LOMEM)
4434 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4435 else
4436 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4437
4438 last_idx = page_idx;
2d21ac55 4439
b0d623f7
A
4440 lck_mtx_lock(&vm_page_queue_free_lock);
4441 /*
4442 * reset our free page limit since we
4443 * dropped the lock protecting the vm_page_free_queue
4444 */
4445 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
4446 goto retry;
4447 }
e5568f75 4448
e5568f75 4449 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55
A
4450
4451 if (wire == TRUE)
4452 m1->wire_count++;
4453 else
4454 m1->gobbled = TRUE;
e5568f75 4455 }
2d21ac55
A
4456 if (wire == FALSE)
4457 vm_page_gobble_count += npages;
4458
4459 /*
4460 * gobbled pages are also counted as wired pages
4461 */
e5568f75 4462 vm_page_wire_count += npages;
e5568f75 4463
2d21ac55
A
4464 assert(vm_page_verify_contiguous(m, npages));
4465 }
4466done_scanning:
39236c6e
A
4467 PAGE_REPLACEMENT_ALLOWED(FALSE);
4468
2d21ac55
A
4469 vm_page_unlock_queues();
4470
593a1d5f 4471#if DEBUG
2d21ac55
A
4472 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4473
4474 tv_end_sec -= tv_start_sec;
4475 if (tv_end_usec < tv_start_usec) {
4476 tv_end_sec--;
4477 tv_end_usec += 1000000;
1c79356b 4478 }
2d21ac55
A
4479 tv_end_usec -= tv_start_usec;
4480 if (tv_end_usec >= 1000000) {
4481 tv_end_sec++;
4482 tv_end_sec -= 1000000;
4483 }
b0d623f7 4484 if (vm_page_find_contig_debug) {
39236c6e
A
4485 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4486 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4487 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4488 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
b0d623f7 4489 }
e5568f75 4490
593a1d5f
A
4491#endif
4492#if MACH_ASSERT
2d21ac55
A
4493 vm_page_verify_free_lists();
4494#endif
e5568f75 4495 return m;
1c79356b
A
4496}
4497
4498/*
4499 * Allocate a list of contiguous, wired pages.
4500 */
4501kern_return_t
4502cpm_allocate(
4503 vm_size_t size,
4504 vm_page_t *list,
2d21ac55 4505 ppnum_t max_pnum,
b0d623f7
A
4506 ppnum_t pnum_mask,
4507 boolean_t wire,
4508 int flags)
1c79356b 4509{
91447636
A
4510 vm_page_t pages;
4511 unsigned int npages;
1c79356b 4512
6d2010ae 4513 if (size % PAGE_SIZE != 0)
1c79356b
A
4514 return KERN_INVALID_ARGUMENT;
4515
b0d623f7
A
4516 npages = (unsigned int) (size / PAGE_SIZE);
4517 if (npages != size / PAGE_SIZE) {
4518 /* 32-bit overflow */
4519 return KERN_INVALID_ARGUMENT;
4520 }
1c79356b 4521
1c79356b
A
4522 /*
4523 * Obtain a pointer to a subset of the free
4524 * list large enough to satisfy the request;
4525 * the region will be physically contiguous.
4526 */
b0d623f7 4527 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 4528
2d21ac55 4529 if (pages == VM_PAGE_NULL)
1c79356b 4530 return KERN_NO_SPACE;
1c79356b 4531 /*
2d21ac55 4532 * determine need for wakeups
1c79356b 4533 */
2d21ac55 4534 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
4535 ((vm_page_free_count < vm_page_free_target) &&
4536 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4537 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 4538
6d2010ae
A
4539 VM_CHECK_MEMORYSTATUS;
4540
1c79356b
A
4541 /*
4542 * The CPM pages should now be available and
4543 * ordered by ascending physical address.
4544 */
4545 assert(vm_page_verify_contiguous(pages, npages));
4546
4547 *list = pages;
4548 return KERN_SUCCESS;
4549}
6d2010ae
A
4550
4551
4552unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4553
4554/*
4555 * when working on a 'run' of pages, it is necessary to hold
4556 * the vm_page_queue_lock (a hot global lock) for certain operations
4557 * on the page... however, the majority of the work can be done
4558 * while merely holding the object lock... in fact there are certain
4559 * collections of pages that don't require any work brokered by the
4560 * vm_page_queue_lock... to mitigate the time spent behind the global
4561 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4562 * while doing all of the work that doesn't require the vm_page_queue_lock...
4563 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4564 * necessary work for each page... we will grab the busy bit on the page
4565 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4566 * if it can't immediately take the vm_page_queue_lock in order to compete
4567 * for the locks in the same order that vm_pageout_scan takes them.
4568 * the operation names are modeled after the names of the routines that
4569 * need to be called in order to make the changes very obvious in the
4570 * original loop
4571 */
4572
4573void
4574vm_page_do_delayed_work(
4575 vm_object_t object,
4576 struct vm_page_delayed_work *dwp,
4577 int dw_count)
4578{
4579 int j;
4580 vm_page_t m;
4581 vm_page_t local_free_q = VM_PAGE_NULL;
6d2010ae
A
4582
4583 /*
4584 * pageout_scan takes the vm_page_lock_queues first
4585 * then tries for the object lock... to avoid what
4586 * is effectively a lock inversion, we'll go to the
4587 * trouble of taking them in that same order... otherwise
4588 * if this object contains the majority of the pages resident
4589 * in the UBC (or a small set of large objects actively being
4590 * worked on contain the majority of the pages), we could
4591 * cause the pageout_scan thread to 'starve' in its attempt
4592 * to find pages to move to the free queue, since it has to
4593 * successfully acquire the object lock of any candidate page
4594 * before it can steal/clean it.
4595 */
4596 if (!vm_page_trylockspin_queues()) {
4597 vm_object_unlock(object);
4598
4599 vm_page_lockspin_queues();
4600
4601 for (j = 0; ; j++) {
4602 if (!vm_object_lock_avoid(object) &&
4603 _vm_object_lock_try(object))
4604 break;
4605 vm_page_unlock_queues();
4606 mutex_pause(j);
4607 vm_page_lockspin_queues();
4608 }
6d2010ae
A
4609 }
4610 for (j = 0; j < dw_count; j++, dwp++) {
4611
4612 m = dwp->dw_m;
4613
6d2010ae
A
4614 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4615 vm_pageout_throttle_up(m);
4616
4617 if (dwp->dw_mask & DW_vm_page_wire)
4618 vm_page_wire(m);
4619 else if (dwp->dw_mask & DW_vm_page_unwire) {
4620 boolean_t queueit;
4621
4622 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4623
4624 vm_page_unwire(m, queueit);
4625 }
4626 if (dwp->dw_mask & DW_vm_page_free) {
4627 vm_page_free_prepare_queues(m);
4628
4629 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4630 /*
4631 * Add this page to our list of reclaimed pages,
4632 * to be freed later.
4633 */
4634 m->pageq.next = (queue_entry_t) local_free_q;
4635 local_free_q = m;
4636 } else {
4637 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4638 vm_page_deactivate_internal(m, FALSE);
4639 else if (dwp->dw_mask & DW_vm_page_activate) {
4640 if (m->active == FALSE) {
4641 vm_page_activate(m);
4642 }
4643 }
4644 else if (dwp->dw_mask & DW_vm_page_speculate)
4645 vm_page_speculate(m, TRUE);
316670eb
A
4646 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4647 /*
4648 * if we didn't hold the object lock and did this,
4649 * we might disconnect the page, then someone might
4650 * soft fault it back in, then we would put it on the
4651 * cleaned queue, and so we would have a referenced (maybe even dirty)
4652 * page on that queue, which we don't want
4653 */
4654 int refmod_state = pmap_disconnect(m->phys_page);
4655
4656 if ((refmod_state & VM_MEM_REFERENCED)) {
4657 /*
4658 * this page has been touched since it got cleaned; let's activate it
4659 * if it hasn't already been
4660 */
4661 vm_pageout_enqueued_cleaned++;
4662 vm_pageout_cleaned_reactivated++;
4663 vm_pageout_cleaned_commit_reactivated++;
4664
4665 if (m->active == FALSE)
4666 vm_page_activate(m);
4667 } else {
4668 m->reference = FALSE;
4669 vm_page_enqueue_cleaned(m);
4670 }
4671 }
6d2010ae
A
4672 else if (dwp->dw_mask & DW_vm_page_lru)
4673 vm_page_lru(m);
316670eb
A
4674 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4675 if ( !m->pageout_queue)
4676 VM_PAGE_QUEUES_REMOVE(m);
4677 }
6d2010ae
A
4678 if (dwp->dw_mask & DW_set_reference)
4679 m->reference = TRUE;
4680 else if (dwp->dw_mask & DW_clear_reference)
4681 m->reference = FALSE;
4682
4683 if (dwp->dw_mask & DW_move_page) {
316670eb
A
4684 if ( !m->pageout_queue) {
4685 VM_PAGE_QUEUES_REMOVE(m);
6d2010ae 4686
316670eb 4687 assert(m->object != kernel_object);
6d2010ae 4688
316670eb
A
4689 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4690 }
6d2010ae
A
4691 }
4692 if (dwp->dw_mask & DW_clear_busy)
4693 m->busy = FALSE;
4694
4695 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4696 PAGE_WAKEUP(m);
4697 }
4698 }
4699 vm_page_unlock_queues();
4700
4701 if (local_free_q)
4702 vm_page_free_list(local_free_q, TRUE);
4703
4704 VM_CHECK_MEMORYSTATUS;
4705
4706}
4707
0b4c1975
A
4708kern_return_t
4709vm_page_alloc_list(
4710 int page_count,
4711 int flags,
4712 vm_page_t *list)
4713{
4714 vm_page_t lo_page_list = VM_PAGE_NULL;
4715 vm_page_t mem;
4716 int i;
4717
4718 if ( !(flags & KMA_LOMEM))
4719 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4720
4721 for (i = 0; i < page_count; i++) {
4722
4723 mem = vm_page_grablo();
4724
4725 if (mem == VM_PAGE_NULL) {
4726 if (lo_page_list)
4727 vm_page_free_list(lo_page_list, FALSE);
4728
4729 *list = VM_PAGE_NULL;
4730
4731 return (KERN_RESOURCE_SHORTAGE);
4732 }
4733 mem->pageq.next = (queue_entry_t) lo_page_list;
4734 lo_page_list = mem;
4735 }
4736 *list = lo_page_list;
4737
4738 return (KERN_SUCCESS);
4739}
4740
4741void
4742vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4743{
4744 page->offset = offset;
4745}
4746
4747vm_page_t
4748vm_page_get_next(vm_page_t page)
4749{
4750 return ((vm_page_t) page->pageq.next);
4751}
4752
4753vm_object_offset_t
4754vm_page_get_offset(vm_page_t page)
4755{
4756 return (page->offset);
4757}
4758
4759ppnum_t
4760vm_page_get_phys_page(vm_page_t page)
4761{
4762 return (page->phys_page);
4763}
4764
4765
b0d623f7
A
4766/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4767
d1ecb069
A
4768#if HIBERNATION
4769
b0d623f7
A
4770static vm_page_t hibernate_gobble_queue;
4771
0b4c1975
A
4772extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4773
4774static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
39236c6e 4775static int hibernate_flush_dirty_pages(int);
0b4c1975 4776static int hibernate_flush_queue(queue_head_t *, int);
0b4c1975
A
4777
4778void hibernate_flush_wait(void);
4779void hibernate_mark_in_progress(void);
4780void hibernate_clear_in_progress(void);
4781
39236c6e
A
4782void hibernate_free_range(int, int);
4783void hibernate_hash_insert_page(vm_page_t);
4784uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4785void hibernate_rebuild_vm_structs(void);
4786uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4787ppnum_t hibernate_lookup_paddr(unsigned int);
0b4c1975
A
4788
4789struct hibernate_statistics {
4790 int hibernate_considered;
4791 int hibernate_reentered_on_q;
4792 int hibernate_found_dirty;
4793 int hibernate_skipped_cleaning;
4794 int hibernate_skipped_transient;
4795 int hibernate_skipped_precious;
39236c6e 4796 int hibernate_skipped_external;
0b4c1975
A
4797 int hibernate_queue_nolock;
4798 int hibernate_queue_paused;
4799 int hibernate_throttled;
4800 int hibernate_throttle_timeout;
4801 int hibernate_drained;
4802 int hibernate_drain_timeout;
4803 int cd_lock_failed;
4804 int cd_found_precious;
4805 int cd_found_wired;
4806 int cd_found_busy;
4807 int cd_found_unusual;
4808 int cd_found_cleaning;
4809 int cd_found_laundry;
4810 int cd_found_dirty;
39236c6e 4811 int cd_found_xpmapped;
0b4c1975
A
4812 int cd_local_free;
4813 int cd_total_free;
4814 int cd_vm_page_wire_count;
39236c6e 4815 int cd_vm_struct_pages_unneeded;
0b4c1975
A
4816 int cd_pages;
4817 int cd_discarded;
4818 int cd_count_wire;
4819} hibernate_stats;
4820
4821
4822
4823static int
4824hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4825{
4826 wait_result_t wait_result;
4827
4828 vm_page_lock_queues();
4829
39236c6e 4830 while ( !queue_empty(&q->pgo_pending) ) {
0b4c1975
A
4831
4832 q->pgo_draining = TRUE;
4833
4834 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4835
4836 vm_page_unlock_queues();
4837
4838 wait_result = thread_block(THREAD_CONTINUE_NULL);
4839
39236c6e 4840 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
0b4c1975 4841 hibernate_stats.hibernate_drain_timeout++;
39236c6e
A
4842
4843 if (q == &vm_pageout_queue_external)
4844 return (0);
4845
0b4c1975
A
4846 return (1);
4847 }
4848 vm_page_lock_queues();
4849
4850 hibernate_stats.hibernate_drained++;
4851 }
4852 vm_page_unlock_queues();
4853
4854 return (0);
4855}
4856
0b4c1975 4857
39236c6e
A
4858boolean_t hibernate_skip_external = FALSE;
4859
0b4c1975
A
4860static int
4861hibernate_flush_queue(queue_head_t *q, int qcount)
4862{
4863 vm_page_t m;
4864 vm_object_t l_object = NULL;
4865 vm_object_t m_object = NULL;
4866 int refmod_state = 0;
4867 int try_failed_count = 0;
4868 int retval = 0;
4869 int current_run = 0;
4870 struct vm_pageout_queue *iq;
4871 struct vm_pageout_queue *eq;
4872 struct vm_pageout_queue *tq;
4873
4874
4875 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4876
4877 iq = &vm_pageout_queue_internal;
4878 eq = &vm_pageout_queue_external;
4879
4880 vm_page_lock_queues();
4881
4882 while (qcount && !queue_empty(q)) {
4883
4884 if (current_run++ == 1000) {
4885 if (hibernate_should_abort()) {
4886 retval = 1;
4887 break;
4888 }
4889 current_run = 0;
4890 }
4891
4892 m = (vm_page_t) queue_first(q);
4893 m_object = m->object;
4894
4895 /*
4896 * check to see if we currently are working
4897 * with the same object... if so, we've
4898 * already got the lock
4899 */
4900 if (m_object != l_object) {
4901 /*
4902 * the object associated with candidate page is
4903 * different from the one we were just working
4904 * with... dump the lock if we still own it
4905 */
4906 if (l_object != NULL) {
4907 vm_object_unlock(l_object);
4908 l_object = NULL;
4909 }
4910 /*
4911 * Try to lock object; since we've alread got the
4912 * page queues lock, we can only 'try' for this one.
4913 * if the 'try' fails, we need to do a mutex_pause
4914 * to allow the owner of the object lock a chance to
4915 * run...
4916 */
4917 if ( !vm_object_lock_try_scan(m_object)) {
4918
4919 if (try_failed_count > 20) {
4920 hibernate_stats.hibernate_queue_nolock++;
4921
4922 goto reenter_pg_on_q;
4923 }
4924 vm_pageout_scan_wants_object = m_object;
4925
4926 vm_page_unlock_queues();
4927 mutex_pause(try_failed_count++);
4928 vm_page_lock_queues();
4929
4930 hibernate_stats.hibernate_queue_paused++;
4931 continue;
4932 } else {
4933 l_object = m_object;
4934 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4935 }
4936 }
316670eb 4937 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
0b4c1975
A
4938 /*
4939 * page is not to be cleaned
4940 * put it back on the head of its queue
4941 */
4942 if (m->cleaning)
4943 hibernate_stats.hibernate_skipped_cleaning++;
4944 else
4945 hibernate_stats.hibernate_skipped_transient++;
4946
4947 goto reenter_pg_on_q;
4948 }
0b4c1975
A
4949 if (m_object->copy == VM_OBJECT_NULL) {
4950 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4951 /*
4952 * let the normal hibernate image path
4953 * deal with these
4954 */
4955 goto reenter_pg_on_q;
4956 }
4957 }
4958 if ( !m->dirty && m->pmapped) {
4959 refmod_state = pmap_get_refmod(m->phys_page);
4960
316670eb
A
4961 if ((refmod_state & VM_MEM_MODIFIED)) {
4962 SET_PAGE_DIRTY(m, FALSE);
4963 }
0b4c1975
A
4964 } else
4965 refmod_state = 0;
4966
4967 if ( !m->dirty) {
4968 /*
4969 * page is not to be cleaned
4970 * put it back on the head of its queue
4971 */
4972 if (m->precious)
4973 hibernate_stats.hibernate_skipped_precious++;
4974
4975 goto reenter_pg_on_q;
4976 }
39236c6e
A
4977
4978 if (hibernate_skip_external == TRUE && !m_object->internal) {
4979
4980 hibernate_stats.hibernate_skipped_external++;
4981
4982 goto reenter_pg_on_q;
4983 }
0b4c1975
A
4984 tq = NULL;
4985
4986 if (m_object->internal) {
4987 if (VM_PAGE_Q_THROTTLED(iq))
4988 tq = iq;
4989 } else if (VM_PAGE_Q_THROTTLED(eq))
4990 tq = eq;
4991
4992 if (tq != NULL) {
4993 wait_result_t wait_result;
4994 int wait_count = 5;
4995
4996 if (l_object != NULL) {
4997 vm_object_unlock(l_object);
4998 l_object = NULL;
4999 }
5000 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5001
0b4c1975
A
5002 while (retval == 0) {
5003
39236c6e
A
5004 tq->pgo_throttled = TRUE;
5005
0b4c1975
A
5006 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5007
316670eb 5008 vm_page_unlock_queues();
0b4c1975 5009
316670eb 5010 wait_result = thread_block(THREAD_CONTINUE_NULL);
0b4c1975
A
5011
5012 vm_page_lock_queues();
5013
39236c6e
A
5014 if (wait_result != THREAD_TIMED_OUT)
5015 break;
5016 if (!VM_PAGE_Q_THROTTLED(tq))
5017 break;
5018
0b4c1975
A
5019 if (hibernate_should_abort())
5020 retval = 1;
5021
0b4c1975 5022 if (--wait_count == 0) {
39236c6e 5023
316670eb 5024 hibernate_stats.hibernate_throttle_timeout++;
39236c6e
A
5025
5026 if (tq == eq) {
5027 hibernate_skip_external = TRUE;
5028 break;
5029 }
316670eb
A
5030 retval = 1;
5031 }
0b4c1975
A
5032 }
5033 if (retval)
5034 break;
5035
5036 hibernate_stats.hibernate_throttled++;
5037
5038 continue;
5039 }
316670eb
A
5040 /*
5041 * we've already factored out pages in the laundry which
5042 * means this page can't be on the pageout queue so it's
5043 * safe to do the VM_PAGE_QUEUES_REMOVE
5044 */
5045 assert(!m->pageout_queue);
5046
0b4c1975
A
5047 VM_PAGE_QUEUES_REMOVE(m);
5048
39236c6e
A
5049 if (COMPRESSED_PAGER_IS_ACTIVE)
5050 pmap_disconnect(m->phys_page);
5051
316670eb 5052 vm_pageout_cluster(m, FALSE);
0b4c1975
A
5053
5054 hibernate_stats.hibernate_found_dirty++;
5055
5056 goto next_pg;
5057
5058reenter_pg_on_q:
5059 queue_remove(q, m, vm_page_t, pageq);
5060 queue_enter(q, m, vm_page_t, pageq);
5061
5062 hibernate_stats.hibernate_reentered_on_q++;
5063next_pg:
5064 hibernate_stats.hibernate_considered++;
5065
5066 qcount--;
5067 try_failed_count = 0;
5068 }
5069 if (l_object != NULL) {
5070 vm_object_unlock(l_object);
5071 l_object = NULL;
5072 }
316670eb 5073 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
0b4c1975
A
5074
5075 vm_page_unlock_queues();
5076
5077 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5078
5079 return (retval);
5080}
5081
5082
5083static int
39236c6e 5084hibernate_flush_dirty_pages(int pass)
0b4c1975
A
5085{
5086 struct vm_speculative_age_q *aq;
5087 uint32_t i;
5088
5089 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5090
5091 if (vm_page_local_q) {
5092 for (i = 0; i < vm_page_local_q_count; i++)
5093 vm_page_reactivate_local(i, TRUE, FALSE);
5094 }
5095
5096 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5097 int qcount;
5098 vm_page_t m;
5099
5100 aq = &vm_page_queue_speculative[i];
5101
5102 if (queue_empty(&aq->age_q))
5103 continue;
5104 qcount = 0;
5105
5106 vm_page_lockspin_queues();
5107
5108 queue_iterate(&aq->age_q,
5109 m,
5110 vm_page_t,
5111 pageq)
5112 {
5113 qcount++;
5114 }
5115 vm_page_unlock_queues();
5116
5117 if (qcount) {
5118 if (hibernate_flush_queue(&aq->age_q, qcount))
5119 return (1);
5120 }
5121 }
316670eb 5122 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
0b4c1975 5123 return (1);
316670eb
A
5124 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5125 return (1);
5126 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
0b4c1975 5127 return (1);
0b4c1975
A
5128 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5129 return (1);
0b4c1975 5130
39236c6e
A
5131 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5132 vm_compressor_record_warmup_start();
5133
5134 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5135 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5136 vm_compressor_record_warmup_end();
5137 return (1);
5138 }
5139 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5140 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5141 vm_compressor_record_warmup_end();
5142 return (1);
5143 }
5144 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5145 vm_compressor_record_warmup_end();
5146
5147 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5148 return (1);
5149
5150 return (0);
5151}
0b4c1975 5152
0b4c1975
A
5153
5154int
5155hibernate_flush_memory()
5156{
5157 int retval;
5158
5159 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5160
39236c6e
A
5161 hibernate_cleaning_in_progress = TRUE;
5162 hibernate_skip_external = FALSE;
5163
5164 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5165
5166 if (COMPRESSED_PAGER_IS_ACTIVE) {
0b4c1975 5167
39236c6e 5168 if ((retval = hibernate_flush_dirty_pages(2)) == 0) {
0b4c1975 5169
39236c6e 5170 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
0b4c1975 5171
39236c6e
A
5172 vm_compressor_flush();
5173
5174 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5175 }
5176 }
5177 if (retval == 0 && consider_buffer_cache_collect != NULL) {
5178 unsigned int orig_wire_count;
5179
5180 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5181 orig_wire_count = vm_page_wire_count;
0b4c1975 5182
0b4c1975 5183 (void)(*consider_buffer_cache_collect)(1);
7ddcb079 5184 consider_zone_gc(TRUE);
0b4c1975 5185
39236c6e
A
5186 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5187
5188 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
0b4c1975
A
5189 }
5190 }
39236c6e
A
5191 hibernate_cleaning_in_progress = FALSE;
5192
0b4c1975
A
5193 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5194
39236c6e
A
5195 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5196 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5197
5198
0b4c1975
A
5199 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5200 hibernate_stats.hibernate_considered,
5201 hibernate_stats.hibernate_reentered_on_q,
5202 hibernate_stats.hibernate_found_dirty);
39236c6e 5203 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
0b4c1975
A
5204 hibernate_stats.hibernate_skipped_cleaning,
5205 hibernate_stats.hibernate_skipped_transient,
5206 hibernate_stats.hibernate_skipped_precious,
39236c6e 5207 hibernate_stats.hibernate_skipped_external,
0b4c1975
A
5208 hibernate_stats.hibernate_queue_nolock);
5209 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5210 hibernate_stats.hibernate_queue_paused,
5211 hibernate_stats.hibernate_throttled,
5212 hibernate_stats.hibernate_throttle_timeout,
5213 hibernate_stats.hibernate_drained,
5214 hibernate_stats.hibernate_drain_timeout);
5215
5216 return (retval);
5217}
5218
6d2010ae 5219
b0d623f7
A
5220static void
5221hibernate_page_list_zero(hibernate_page_list_t *list)
5222{
5223 uint32_t bank;
5224 hibernate_bitmap_t * bitmap;
5225
5226 bitmap = &list->bank_bitmap[0];
5227 for (bank = 0; bank < list->bank_count; bank++)
5228 {
5229 uint32_t last_bit;
5230
5231 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5232 // set out-of-bound bits at end of bitmap.
5233 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5234 if (last_bit)
5235 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5236
5237 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5238 }
5239}
5240
5241void
5242hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5243{
5244 uint32_t i;
5245 vm_page_t m;
5246 uint64_t start, end, timeout, nsec;
5247 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5248 clock_get_uptime(&start);
5249
5250 for (i = 0; i < gobble_count; i++)
5251 {
5252 while (VM_PAGE_NULL == (m = vm_page_grab()))
5253 {
5254 clock_get_uptime(&end);
5255 if (end >= timeout)
5256 break;
5257 VM_PAGE_WAIT();
5258 }
5259 if (!m)
5260 break;
5261 m->busy = FALSE;
5262 vm_page_gobble(m);
5263
5264 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5265 hibernate_gobble_queue = m;
5266 }
5267
5268 clock_get_uptime(&end);
5269 absolutetime_to_nanoseconds(end - start, &nsec);
5270 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5271}
5272
5273void
5274hibernate_free_gobble_pages(void)
5275{
5276 vm_page_t m, next;
5277 uint32_t count = 0;
5278
5279 m = (vm_page_t) hibernate_gobble_queue;
5280 while(m)
5281 {
5282 next = (vm_page_t) m->pageq.next;
5283 vm_page_free(m);
5284 count++;
5285 m = next;
5286 }
5287 hibernate_gobble_queue = VM_PAGE_NULL;
5288
5289 if (count)
5290 HIBLOG("Freed %d pages\n", count);
5291}
5292
5293static boolean_t
db609669 5294hibernate_consider_discard(vm_page_t m, boolean_t preflight)
b0d623f7
A
5295{
5296 vm_object_t object = NULL;
5297 int refmod_state;
5298 boolean_t discard = FALSE;
5299
5300 do
5301 {
0b4c1975 5302 if (m->private)
b0d623f7
A
5303 panic("hibernate_consider_discard: private");
5304
0b4c1975 5305 if (!vm_object_lock_try(m->object)) {
db609669 5306 if (!preflight) hibernate_stats.cd_lock_failed++;
b0d623f7 5307 break;
0b4c1975 5308 }
b0d623f7
A
5309 object = m->object;
5310
0b4c1975 5311 if (VM_PAGE_WIRED(m)) {
db609669 5312 if (!preflight) hibernate_stats.cd_found_wired++;
b0d623f7 5313 break;
0b4c1975
A
5314 }
5315 if (m->precious) {
db609669 5316 if (!preflight) hibernate_stats.cd_found_precious++;
b0d623f7 5317 break;
0b4c1975
A
5318 }
5319 if (m->busy || !object->alive) {
b0d623f7
A
5320 /*
5321 * Somebody is playing with this page.
5322 */
db609669 5323 if (!preflight) hibernate_stats.cd_found_busy++;
6d2010ae 5324 break;
0b4c1975
A
5325 }
5326 if (m->absent || m->unusual || m->error) {
b0d623f7
A
5327 /*
5328 * If it's unusual in anyway, ignore it
5329 */
db609669 5330 if (!preflight) hibernate_stats.cd_found_unusual++;
b0d623f7 5331 break;
0b4c1975
A
5332 }
5333 if (m->cleaning) {
db609669 5334 if (!preflight) hibernate_stats.cd_found_cleaning++;
b0d623f7 5335 break;
0b4c1975 5336 }
316670eb 5337 if (m->laundry) {
db609669 5338 if (!preflight) hibernate_stats.cd_found_laundry++;
b0d623f7 5339 break;
0b4c1975 5340 }
b0d623f7
A
5341 if (!m->dirty)
5342 {
5343 refmod_state = pmap_get_refmod(m->phys_page);
5344
5345 if (refmod_state & VM_MEM_REFERENCED)
5346 m->reference = TRUE;
316670eb
A
5347 if (refmod_state & VM_MEM_MODIFIED) {
5348 SET_PAGE_DIRTY(m, FALSE);
5349 }
b0d623f7
A
5350 }
5351
5352 /*
5353 * If it's clean or purgeable we can discard the page on wakeup.
5354 */
5355 discard = (!m->dirty)
5356 || (VM_PURGABLE_VOLATILE == object->purgable)
0b4c1975
A
5357 || (VM_PURGABLE_EMPTY == object->purgable);
5358
39236c6e
A
5359
5360 if (discard == FALSE) {
5361 if (!preflight)
5362 hibernate_stats.cd_found_dirty++;
5363 } else if (m->xpmapped && m->reference) {
5364 if (!preflight)
5365 hibernate_stats.cd_found_xpmapped++;
5366 discard = FALSE;
5367 }
b0d623f7
A
5368 }
5369 while (FALSE);
5370
5371 if (object)
5372 vm_object_unlock(object);
5373
5374 return (discard);
5375}
5376
5377
5378static void
5379hibernate_discard_page(vm_page_t m)
5380{
5381 if (m->absent || m->unusual || m->error)
5382 /*
5383 * If it's unusual in anyway, ignore
5384 */
5385 return;
5386
316670eb
A
5387#if DEBUG
5388 vm_object_t object = m->object;
5389 if (!vm_object_lock_try(m->object))
5390 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5391#else
5392 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5393 makes sure these locks are uncontended before sleep */
5394#endif /* !DEBUG */
5395
b0d623f7
A
5396 if (m->pmapped == TRUE)
5397 {
5398 __unused int refmod_state = pmap_disconnect(m->phys_page);
5399 }
5400
5401 if (m->laundry)
5402 panic("hibernate_discard_page(%p) laundry", m);
5403 if (m->private)
5404 panic("hibernate_discard_page(%p) private", m);
5405 if (m->fictitious)
5406 panic("hibernate_discard_page(%p) fictitious", m);
5407
5408 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5409 {
5410 /* object should be on a queue */
5411 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5412 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5413 assert(old_queue);
39236c6e
A
5414 if (m->object->purgeable_when_ripe) {
5415 vm_purgeable_token_delete_first(old_queue);
5416 }
b0d623f7
A
5417 m->object->purgable = VM_PURGABLE_EMPTY;
5418 }
5419
5420 vm_page_free(m);
316670eb
A
5421
5422#if DEBUG
5423 vm_object_unlock(object);
5424#endif /* DEBUG */
b0d623f7
A
5425}
5426
db609669
A
5427/*
5428 Grab locks for hibernate_page_list_setall()
5429*/
5430void
5431hibernate_vm_lock_queues(void)
5432{
39236c6e 5433 vm_object_lock(compressor_object);
db609669
A
5434 vm_page_lock_queues();
5435 lck_mtx_lock(&vm_page_queue_free_lock);
5436
5437 if (vm_page_local_q) {
5438 uint32_t i;
5439 for (i = 0; i < vm_page_local_q_count; i++) {
5440 struct vpl *lq;
5441 lq = &vm_page_local_q[i].vpl_un.vpl;
5442 VPL_LOCK(&lq->vpl_lock);
5443 }
5444 }
5445}
5446
5447void
5448hibernate_vm_unlock_queues(void)
5449{
5450 if (vm_page_local_q) {
5451 uint32_t i;
5452 for (i = 0; i < vm_page_local_q_count; i++) {
5453 struct vpl *lq;
5454 lq = &vm_page_local_q[i].vpl_un.vpl;
5455 VPL_UNLOCK(&lq->vpl_lock);
5456 }
5457 }
5458 lck_mtx_unlock(&vm_page_queue_free_lock);
5459 vm_page_unlock_queues();
39236c6e 5460 vm_object_unlock(compressor_object);
db609669
A
5461}
5462
b0d623f7
A
5463/*
5464 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5465 pages known to VM to not need saving are subtracted.
5466 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5467*/
5468
5469void
5470hibernate_page_list_setall(hibernate_page_list_t * page_list,
5471 hibernate_page_list_t * page_list_wired,
6d2010ae 5472 hibernate_page_list_t * page_list_pal,
39236c6e
A
5473 boolean_t preflight,
5474 boolean_t will_discard,
b0d623f7
A
5475 uint32_t * pagesOut)
5476{
5477 uint64_t start, end, nsec;
5478 vm_page_t m;
39236c6e 5479 vm_page_t next;
b0d623f7 5480 uint32_t pages = page_list->page_count;
39236c6e 5481 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
316670eb 5482 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
b0d623f7
A
5483 uint32_t count_wire = pages;
5484 uint32_t count_discard_active = 0;
5485 uint32_t count_discard_inactive = 0;
316670eb 5486 uint32_t count_discard_cleaned = 0;
b0d623f7
A
5487 uint32_t count_discard_purgeable = 0;
5488 uint32_t count_discard_speculative = 0;
39236c6e 5489 uint32_t count_discard_vm_struct_pages = 0;
b0d623f7
A
5490 uint32_t i;
5491 uint32_t bank;
5492 hibernate_bitmap_t * bitmap;
5493 hibernate_bitmap_t * bitmap_wired;
39236c6e
A
5494 boolean_t discard_all;
5495 boolean_t discard;
b0d623f7 5496
db609669 5497 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
b0d623f7 5498
db609669
A
5499 if (preflight) {
5500 page_list = NULL;
5501 page_list_wired = NULL;
5502 page_list_pal = NULL;
39236c6e
A
5503 discard_all = FALSE;
5504 } else {
5505 discard_all = will_discard;
db609669 5506 }
0b4c1975 5507
316670eb 5508#if DEBUG
39236c6e
A
5509 if (!preflight)
5510 {
316670eb
A
5511 vm_page_lock_queues();
5512 if (vm_page_local_q) {
5513 for (i = 0; i < vm_page_local_q_count; i++) {
5514 struct vpl *lq;
5515 lq = &vm_page_local_q[i].vpl_un.vpl;
5516 VPL_LOCK(&lq->vpl_lock);
5517 }
5518 }
39236c6e 5519 }
316670eb
A
5520#endif /* DEBUG */
5521
5522
0b4c1975 5523 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
b0d623f7
A
5524
5525 clock_get_uptime(&start);
5526
db609669
A
5527 if (!preflight) {
5528 hibernate_page_list_zero(page_list);
5529 hibernate_page_list_zero(page_list_wired);
5530 hibernate_page_list_zero(page_list_pal);
5531
5532 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5533 hibernate_stats.cd_pages = pages;
5534 }
0b4c1975 5535
b0d623f7
A
5536 if (vm_page_local_q) {
5537 for (i = 0; i < vm_page_local_q_count; i++)
db609669
A
5538 vm_page_reactivate_local(i, TRUE, !preflight);
5539 }
5540
5541 if (preflight) {
39236c6e 5542 vm_object_lock(compressor_object);
db609669
A
5543 vm_page_lock_queues();
5544 lck_mtx_lock(&vm_page_queue_free_lock);
b0d623f7
A
5545 }
5546
5547 m = (vm_page_t) hibernate_gobble_queue;
39236c6e 5548 while (m)
b0d623f7
A
5549 {
5550 pages--;
5551 count_wire--;
db609669
A
5552 if (!preflight) {
5553 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5554 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5555 }
b0d623f7
A
5556 m = (vm_page_t) m->pageq.next;
5557 }
6d2010ae 5558
db609669 5559 if (!preflight) for( i = 0; i < real_ncpus; i++ )
0b4c1975
A
5560 {
5561 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5562 {
5563 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5564 {
5565 pages--;
5566 count_wire--;
5567 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5568 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5569
5570 hibernate_stats.cd_local_free++;
5571 hibernate_stats.cd_total_free++;
5572 }
5573 }
5574 }
6d2010ae 5575
b0d623f7
A
5576 for( i = 0; i < vm_colors; i++ )
5577 {
5578 queue_iterate(&vm_page_queue_free[i],
5579 m,
5580 vm_page_t,
5581 pageq)
5582 {
5583 pages--;
5584 count_wire--;
db609669
A
5585 if (!preflight) {
5586 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5587 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5588
5589 hibernate_stats.cd_total_free++;
5590 }
b0d623f7
A
5591 }
5592 }
5593
5594 queue_iterate(&vm_lopage_queue_free,
5595 m,
5596 vm_page_t,
5597 pageq)
5598 {
5599 pages--;
5600 count_wire--;
db609669
A
5601 if (!preflight) {
5602 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5603 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5604
5605 hibernate_stats.cd_total_free++;
5606 }
b0d623f7
A
5607 }
5608
39236c6e
A
5609 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5610 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
b0d623f7 5611 {
39236c6e
A
5612 next = (vm_page_t) m->pageq.next;
5613 discard = FALSE;
b0d623f7 5614 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 5615 && hibernate_consider_discard(m, preflight))
b0d623f7 5616 {
db609669 5617 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7 5618 count_discard_inactive++;
39236c6e 5619 discard = discard_all;
b0d623f7
A
5620 }
5621 else
5622 count_throttled++;
5623 count_wire--;
db609669 5624 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
5625
5626 if (discard) hibernate_discard_page(m);
5627 m = next;
b0d623f7
A
5628 }
5629
39236c6e
A
5630 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5631 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
b0d623f7 5632 {
39236c6e
A
5633 next = (vm_page_t) m->pageq.next;
5634 discard = FALSE;
b0d623f7 5635 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 5636 && hibernate_consider_discard(m, preflight))
b0d623f7 5637 {
db609669 5638 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7
A
5639 if (m->dirty)
5640 count_discard_purgeable++;
5641 else
5642 count_discard_inactive++;
39236c6e 5643 discard = discard_all;
b0d623f7
A
5644 }
5645 else
39236c6e 5646 count_anonymous++;
b0d623f7 5647 count_wire--;
db609669 5648 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
5649 if (discard) hibernate_discard_page(m);
5650 m = next;
b0d623f7
A
5651 }
5652
39236c6e
A
5653 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5654 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
b0d623f7 5655 {
39236c6e
A
5656 next = (vm_page_t) m->pageq.next;
5657 discard = FALSE;
b0d623f7 5658 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 5659 && hibernate_consider_discard(m, preflight))
b0d623f7 5660 {
db609669 5661 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7
A
5662 if (m->dirty)
5663 count_discard_purgeable++;
5664 else
5665 count_discard_inactive++;
39236c6e 5666 discard = discard_all;
b0d623f7
A
5667 }
5668 else
5669 count_inactive++;
5670 count_wire--;
db609669 5671 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
5672 if (discard) hibernate_discard_page(m);
5673 m = next;
b0d623f7
A
5674 }
5675
39236c6e
A
5676 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5677 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
316670eb 5678 {
39236c6e
A
5679 next = (vm_page_t) m->pageq.next;
5680 discard = FALSE;
316670eb 5681 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
db609669 5682 && hibernate_consider_discard(m, preflight))
316670eb 5683 {
db609669 5684 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
316670eb
A
5685 if (m->dirty)
5686 count_discard_purgeable++;
5687 else
5688 count_discard_cleaned++;
39236c6e 5689 discard = discard_all;
316670eb
A
5690 }
5691 else
5692 count_cleaned++;
5693 count_wire--;
db609669 5694 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
5695 if (discard) hibernate_discard_page(m);
5696 m = next;
316670eb
A
5697 }
5698
b0d623f7
A
5699 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5700 {
39236c6e
A
5701 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5702 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5703 {
5704 next = (vm_page_t) m->pageq.next;
5705 discard = FALSE;
5706 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5707 && hibernate_consider_discard(m, preflight))
5708 {
5709 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5710 count_discard_speculative++;
5711 discard = discard_all;
5712 }
5713 else
5714 count_speculative++;
5715 count_wire--;
5716 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5717 if (discard) hibernate_discard_page(m);
5718 m = next;
5719 }
b0d623f7
A
5720 }
5721
39236c6e
A
5722 m = (vm_page_t) queue_first(&vm_page_queue_active);
5723 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
b0d623f7 5724 {
39236c6e
A
5725 next = (vm_page_t) m->pageq.next;
5726 discard = FALSE;
b0d623f7 5727 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
db609669 5728 && hibernate_consider_discard(m, preflight))
b0d623f7 5729 {
db609669 5730 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
b0d623f7
A
5731 if (m->dirty)
5732 count_discard_purgeable++;
5733 else
5734 count_discard_active++;
39236c6e 5735 discard = discard_all;
b0d623f7
A
5736 }
5737 else
5738 count_active++;
5739 count_wire--;
db609669 5740 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
39236c6e
A
5741 if (discard) hibernate_discard_page(m);
5742 m = next;
5743 }
5744
5745 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5746 {
5747 count_compressor++;
5748 count_wire--;
5749 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5750 }
5751
5752 if (preflight == FALSE && discard_all == TRUE) {
5753 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5754
5755 HIBLOG("hibernate_teardown started\n");
5756 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5757 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5758
5759 pages -= count_discard_vm_struct_pages;
5760 count_wire -= count_discard_vm_struct_pages;
5761
5762 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5763
5764 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
b0d623f7
A
5765 }
5766
db609669
A
5767 if (!preflight) {
5768 // pull wired from hibernate_bitmap
5769 bitmap = &page_list->bank_bitmap[0];
5770 bitmap_wired = &page_list_wired->bank_bitmap[0];
5771 for (bank = 0; bank < page_list->bank_count; bank++)
5772 {
5773 for (i = 0; i < bitmap->bitmapwords; i++)
5774 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5775 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5776 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5777 }
b0d623f7
A
5778 }
5779
5780 // machine dependent adjustments
db609669 5781 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
b0d623f7 5782
db609669
A
5783 if (!preflight) {
5784 hibernate_stats.cd_count_wire = count_wire;
39236c6e
A
5785 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
5786 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
db609669 5787 }
0b4c1975 5788
b0d623f7
A
5789 clock_get_uptime(&end);
5790 absolutetime_to_nanoseconds(end - start, &nsec);
5791 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5792
39236c6e
A
5793 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5794 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
5795 discard_all ? "did" : "could",
316670eb 5796 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7 5797
316670eb
A
5798 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5799
39236c6e
A
5800 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
5801
316670eb 5802#if DEBUG
39236c6e
A
5803 if (!preflight)
5804 {
316670eb
A
5805 if (vm_page_local_q) {
5806 for (i = 0; i < vm_page_local_q_count; i++) {
5807 struct vpl *lq;
5808 lq = &vm_page_local_q[i].vpl_un.vpl;
5809 VPL_UNLOCK(&lq->vpl_lock);
5810 }
5811 }
5812 vm_page_unlock_queues();
39236c6e 5813 }
316670eb 5814#endif /* DEBUG */
0b4c1975 5815
db609669
A
5816 if (preflight) {
5817 lck_mtx_unlock(&vm_page_queue_free_lock);
5818 vm_page_unlock_queues();
39236c6e 5819 vm_object_unlock(compressor_object);
db609669
A
5820 }
5821
0b4c1975 5822 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
5823}
5824
5825void
5826hibernate_page_list_discard(hibernate_page_list_t * page_list)
5827{
5828 uint64_t start, end, nsec;
5829 vm_page_t m;
5830 vm_page_t next;
5831 uint32_t i;
5832 uint32_t count_discard_active = 0;
5833 uint32_t count_discard_inactive = 0;
5834 uint32_t count_discard_purgeable = 0;
316670eb 5835 uint32_t count_discard_cleaned = 0;
b0d623f7
A
5836 uint32_t count_discard_speculative = 0;
5837
39236c6e 5838
316670eb
A
5839#if DEBUG
5840 vm_page_lock_queues();
5841 if (vm_page_local_q) {
5842 for (i = 0; i < vm_page_local_q_count; i++) {
5843 struct vpl *lq;
5844 lq = &vm_page_local_q[i].vpl_un.vpl;
5845 VPL_LOCK(&lq->vpl_lock);
5846 }
5847 }
5848#endif /* DEBUG */
5849
b0d623f7
A
5850 clock_get_uptime(&start);
5851
316670eb
A
5852 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5853 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
b0d623f7
A
5854 {
5855 next = (vm_page_t) m->pageq.next;
5856 if (hibernate_page_bittst(page_list, m->phys_page))
5857 {
5858 if (m->dirty)
5859 count_discard_purgeable++;
5860 else
5861 count_discard_inactive++;
5862 hibernate_discard_page(m);
5863 }
5864 m = next;
5865 }
5866
5867 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5868 {
5869 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5870 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5871 {
5872 next = (vm_page_t) m->pageq.next;
5873 if (hibernate_page_bittst(page_list, m->phys_page))
5874 {
5875 count_discard_speculative++;
5876 hibernate_discard_page(m);
5877 }
5878 m = next;
5879 }
5880 }
5881
5882 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5883 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5884 {
5885 next = (vm_page_t) m->pageq.next;
5886 if (hibernate_page_bittst(page_list, m->phys_page))
5887 {
5888 if (m->dirty)
5889 count_discard_purgeable++;
5890 else
5891 count_discard_inactive++;
5892 hibernate_discard_page(m);
5893 }
5894 m = next;
5895 }
5896
5897 m = (vm_page_t) queue_first(&vm_page_queue_active);
5898 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5899 {
5900 next = (vm_page_t) m->pageq.next;
5901 if (hibernate_page_bittst(page_list, m->phys_page))
5902 {
5903 if (m->dirty)
5904 count_discard_purgeable++;
5905 else
5906 count_discard_active++;
5907 hibernate_discard_page(m);
5908 }
5909 m = next;
5910 }
5911
316670eb
A
5912 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5913 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5914 {
5915 next = (vm_page_t) m->pageq.next;
5916 if (hibernate_page_bittst(page_list, m->phys_page))
5917 {
5918 if (m->dirty)
5919 count_discard_purgeable++;
5920 else
5921 count_discard_cleaned++;
5922 hibernate_discard_page(m);
5923 }
5924 m = next;
5925 }
5926
5927#if DEBUG
5928 if (vm_page_local_q) {
5929 for (i = 0; i < vm_page_local_q_count; i++) {
5930 struct vpl *lq;
5931 lq = &vm_page_local_q[i].vpl_un.vpl;
5932 VPL_UNLOCK(&lq->vpl_lock);
5933 }
5934 }
5935 vm_page_unlock_queues();
5936#endif /* DEBUG */
5937
b0d623f7
A
5938 clock_get_uptime(&end);
5939 absolutetime_to_nanoseconds(end - start, &nsec);
316670eb 5940 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
b0d623f7 5941 nsec / 1000000ULL,
316670eb 5942 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7
A
5943}
5944
39236c6e
A
5945boolean_t hibernate_paddr_map_inited = FALSE;
5946boolean_t hibernate_rebuild_needed = FALSE;
5947unsigned int hibernate_teardown_last_valid_compact_indx = -1;
5948vm_page_t hibernate_rebuild_hash_list = NULL;
5949
5950unsigned int hibernate_teardown_found_tabled_pages = 0;
5951unsigned int hibernate_teardown_found_created_pages = 0;
5952unsigned int hibernate_teardown_found_free_pages = 0;
5953unsigned int hibernate_teardown_vm_page_free_count;
5954
5955
5956struct ppnum_mapping {
5957 struct ppnum_mapping *ppnm_next;
5958 ppnum_t ppnm_base_paddr;
5959 unsigned int ppnm_sindx;
5960 unsigned int ppnm_eindx;
5961};
5962
5963struct ppnum_mapping *ppnm_head;
5964struct ppnum_mapping *ppnm_last_found = NULL;
5965
5966
5967void
5968hibernate_create_paddr_map()
5969{
5970 unsigned int i;
5971 ppnum_t next_ppnum_in_run = 0;
5972 struct ppnum_mapping *ppnm = NULL;
5973
5974 if (hibernate_paddr_map_inited == FALSE) {
5975
5976 for (i = 0; i < vm_pages_count; i++) {
5977
5978 if (ppnm)
5979 ppnm->ppnm_eindx = i;
5980
5981 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
5982
5983 ppnm = kalloc(sizeof(struct ppnum_mapping));
5984
5985 ppnm->ppnm_next = ppnm_head;
5986 ppnm_head = ppnm;
5987
5988 ppnm->ppnm_sindx = i;
5989 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
5990 }
5991 next_ppnum_in_run = vm_pages[i].phys_page + 1;
5992 }
5993 ppnm->ppnm_eindx++;
5994
5995 hibernate_paddr_map_inited = TRUE;
5996 }
5997}
5998
5999ppnum_t
6000hibernate_lookup_paddr(unsigned int indx)
6001{
6002 struct ppnum_mapping *ppnm = NULL;
6003
6004 ppnm = ppnm_last_found;
6005
6006 if (ppnm) {
6007 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6008 goto done;
6009 }
6010 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6011
6012 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6013 ppnm_last_found = ppnm;
6014 break;
6015 }
6016 }
6017 if (ppnm == NULL)
6018 panic("hibernate_lookup_paddr of %d failed\n", indx);
6019done:
6020 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6021}
6022
6023
6024uint32_t
6025hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6026{
6027 addr64_t saddr_aligned;
6028 addr64_t eaddr_aligned;
6029 addr64_t addr;
6030 ppnum_t paddr;
6031 unsigned int mark_as_unneeded_pages = 0;
6032
6033 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6034 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6035
6036 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6037
6038 paddr = pmap_find_phys(kernel_pmap, addr);
6039
6040 assert(paddr);
6041
6042 hibernate_page_bitset(page_list, TRUE, paddr);
6043 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6044
6045 mark_as_unneeded_pages++;
6046 }
6047 return (mark_as_unneeded_pages);
6048}
6049
6050
6051void
6052hibernate_hash_insert_page(vm_page_t mem)
6053{
6054 vm_page_bucket_t *bucket;
6055 int hash_id;
6056
15129b1c 6057 assert(mem->hashed);
39236c6e
A
6058 assert(mem->object);
6059 assert(mem->offset != (vm_object_offset_t) -1);
6060
6061 /*
6062 * Insert it into the object_object/offset hash table
6063 */
6064 hash_id = vm_page_hash(mem->object, mem->offset);
6065 bucket = &vm_page_buckets[hash_id];
6066
6067 mem->next = bucket->pages;
6068 bucket->pages = mem;
6069}
6070
6071
6072void
6073hibernate_free_range(int sindx, int eindx)
6074{
6075 vm_page_t mem;
6076 unsigned int color;
6077
6078 while (sindx < eindx) {
6079 mem = &vm_pages[sindx];
6080
6081 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6082
6083 mem->lopage = FALSE;
6084 mem->free = TRUE;
6085
6086 color = mem->phys_page & vm_color_mask;
6087 queue_enter_first(&vm_page_queue_free[color],
6088 mem,
6089 vm_page_t,
6090 pageq);
6091 vm_page_free_count++;
6092
6093 sindx++;
6094 }
6095}
6096
6097
6098extern void hibernate_rebuild_pmap_structs(void);
6099
6100void
6101hibernate_rebuild_vm_structs(void)
6102{
6103 int cindx, sindx, eindx;
6104 vm_page_t mem, tmem, mem_next;
6105 AbsoluteTime startTime, endTime;
6106 uint64_t nsec;
6107
6108 if (hibernate_rebuild_needed == FALSE)
6109 return;
6110
6111 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6112 HIBLOG("hibernate_rebuild started\n");
6113
6114 clock_get_uptime(&startTime);
6115
6116 hibernate_rebuild_pmap_structs();
6117
6118 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6119 eindx = vm_pages_count;
6120
6121 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6122
6123 mem = &vm_pages[cindx];
6124 /*
6125 * hibernate_teardown_vm_structs leaves the location where
6126 * this vm_page_t must be located in "next".
6127 */
6128 tmem = mem->next;
6129 mem->next = NULL;
6130
6131 sindx = (int)(tmem - &vm_pages[0]);
6132
6133 if (mem != tmem) {
6134 /*
6135 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6136 * so move it back to its real location
6137 */
6138 *tmem = *mem;
6139 mem = tmem;
6140 }
15129b1c 6141 if (mem->hashed)
39236c6e
A
6142 hibernate_hash_insert_page(mem);
6143 /*
6144 * the 'hole' between this vm_page_t and the previous
6145 * vm_page_t we moved needs to be initialized as
6146 * a range of free vm_page_t's
6147 */
6148 hibernate_free_range(sindx + 1, eindx);
6149
6150 eindx = sindx;
6151 }
6152 if (sindx)
6153 hibernate_free_range(0, sindx);
6154
6155 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6156
6157 /*
15129b1c 6158 * process the list of vm_page_t's that were entered in the hash,
39236c6e
A
6159 * but were not located in the vm_pages arrary... these are
6160 * vm_page_t's that were created on the fly (i.e. fictitious)
6161 */
6162 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6163 mem_next = mem->next;
6164
6165 mem->next = NULL;
6166 hibernate_hash_insert_page(mem);
6167 }
6168 hibernate_rebuild_hash_list = NULL;
6169
6170 clock_get_uptime(&endTime);
6171 SUB_ABSOLUTETIME(&endTime, &startTime);
6172 absolutetime_to_nanoseconds(endTime, &nsec);
6173
6174 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6175
6176 hibernate_rebuild_needed = FALSE;
6177
6178 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6179}
6180
6181
6182extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6183
6184uint32_t
6185hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6186{
6187 unsigned int i;
6188 unsigned int compact_target_indx;
6189 vm_page_t mem, mem_next;
6190 vm_page_bucket_t *bucket;
6191 unsigned int mark_as_unneeded_pages = 0;
6192 unsigned int unneeded_vm_page_bucket_pages = 0;
6193 unsigned int unneeded_vm_pages_pages = 0;
6194 unsigned int unneeded_pmap_pages = 0;
6195 addr64_t start_of_unneeded = 0;
6196 addr64_t end_of_unneeded = 0;
6197
6198
6199 if (hibernate_should_abort())
6200 return (0);
6201
6202 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6203 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6204 vm_page_cleaned_count, compressor_object->resident_page_count);
6205
6206 for (i = 0; i < vm_page_bucket_count; i++) {
6207
6208 bucket = &vm_page_buckets[i];
6209
6210 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) {
15129b1c 6211 assert(mem->hashed);
39236c6e
A
6212
6213 mem_next = mem->next;
6214
6215 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6216 mem->next = hibernate_rebuild_hash_list;
6217 hibernate_rebuild_hash_list = mem;
6218 }
6219 }
6220 }
6221 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6222 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6223
6224 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6225
6226 compact_target_indx = 0;
6227
6228 for (i = 0; i < vm_pages_count; i++) {
6229
6230 mem = &vm_pages[i];
6231
6232 if (mem->free) {
6233 unsigned int color;
6234
6235 assert(mem->busy);
6236 assert(!mem->lopage);
6237
6238 color = mem->phys_page & vm_color_mask;
6239
6240 queue_remove(&vm_page_queue_free[color],
6241 mem,
6242 vm_page_t,
6243 pageq);
6244 mem->pageq.next = NULL;
6245 mem->pageq.prev = NULL;
6246
6247 vm_page_free_count--;
6248
6249 hibernate_teardown_found_free_pages++;
6250
6251 if ( !vm_pages[compact_target_indx].free)
6252 compact_target_indx = i;
6253 } else {
6254 /*
6255 * record this vm_page_t's original location
6256 * we need this even if it doesn't get moved
6257 * as an indicator to the rebuild function that
6258 * we don't have to move it
6259 */
6260 mem->next = mem;
6261
6262 if (vm_pages[compact_target_indx].free) {
6263 /*
6264 * we've got a hole to fill, so
6265 * move this vm_page_t to it's new home
6266 */
6267 vm_pages[compact_target_indx] = *mem;
6268 mem->free = TRUE;
6269
6270 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6271 compact_target_indx++;
6272 } else
6273 hibernate_teardown_last_valid_compact_indx = i;
6274 }
6275 }
6276 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6277 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6278 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6279
6280 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6281
6282 if (start_of_unneeded) {
6283 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6284 mark_as_unneeded_pages += unneeded_pmap_pages;
6285 }
6286 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6287
6288 hibernate_rebuild_needed = TRUE;
6289
6290 return (mark_as_unneeded_pages);
6291}
6292
6293
d1ecb069
A
6294#endif /* HIBERNATION */
6295
b0d623f7 6296/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
6297
6298#include <mach_vm_debug.h>
6299#if MACH_VM_DEBUG
6300
6301#include <mach_debug/hash_info.h>
6302#include <vm/vm_debug.h>
6303
6304/*
6305 * Routine: vm_page_info
6306 * Purpose:
6307 * Return information about the global VP table.
6308 * Fills the buffer with as much information as possible
6309 * and returns the desired size of the buffer.
6310 * Conditions:
6311 * Nothing locked. The caller should provide
6312 * possibly-pageable memory.
6313 */
6314
6315unsigned int
6316vm_page_info(
6317 hash_info_bucket_t *info,
6318 unsigned int count)
6319{
91447636 6320 unsigned int i;
b0d623f7 6321 lck_spin_t *bucket_lock;
1c79356b
A
6322
6323 if (vm_page_bucket_count < count)
6324 count = vm_page_bucket_count;
6325
6326 for (i = 0; i < count; i++) {
6327 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6328 unsigned int bucket_count = 0;
6329 vm_page_t m;
6330
b0d623f7
A
6331 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6332 lck_spin_lock(bucket_lock);
6333
1c79356b
A
6334 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
6335 bucket_count++;
b0d623f7
A
6336
6337 lck_spin_unlock(bucket_lock);
1c79356b
A
6338
6339 /* don't touch pageable memory while holding locks */
6340 info[i].hib_count = bucket_count;
6341 }
6342
6343 return vm_page_bucket_count;
6344}
6345#endif /* MACH_VM_DEBUG */
15129b1c
A
6346
6347#if VM_PAGE_BUCKETS_CHECK
6348void
6349vm_page_buckets_check(void)
6350{
6351 unsigned int i;
6352 vm_page_t p;
6353 unsigned int p_hash;
6354 vm_page_bucket_t *bucket;
6355 lck_spin_t *bucket_lock;
6356
6357 if (!vm_page_buckets_check_ready) {
6358 return;
6359 }
6360
6361#if HIBERNATION
6362 if (hibernate_rebuild_needed ||
6363 hibernate_rebuild_hash_list) {
6364 panic("BUCKET_CHECK: hibernation in progress: "
6365 "rebuild_needed=%d rebuild_hash_list=%p\n",
6366 hibernate_rebuild_needed,
6367 hibernate_rebuild_hash_list);
6368 }
6369#endif /* HIBERNATION */
6370
6371#if VM_PAGE_FAKE_BUCKETS
6372 char *cp;
6373 for (cp = (char *) vm_page_fake_buckets_start;
6374 cp < (char *) vm_page_fake_buckets_end;
6375 cp++) {
6376 if (*cp != 0x5a) {
6377 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6378 "[0x%llx:0x%llx]\n",
6379 cp,
6380 vm_page_fake_buckets_start,
6381 vm_page_fake_buckets_end);
6382 }
6383 }
6384#endif /* VM_PAGE_FAKE_BUCKETS */
6385
6386 for (i = 0; i < vm_page_bucket_count; i++) {
6387 bucket = &vm_page_buckets[i];
6388 if (bucket->pages == VM_PAGE_NULL) {
6389 continue;
6390 }
6391
6392 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6393 lck_spin_lock(bucket_lock);
6394 p = bucket->pages;
6395 while (p != VM_PAGE_NULL) {
6396 if (!p->hashed) {
6397 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6398 "hash %d in bucket %d at %p "
6399 "is not hashed\n",
6400 p, p->object, p->offset,
6401 p_hash, i, bucket);
6402 }
6403 p_hash = vm_page_hash(p->object, p->offset);
6404 if (p_hash != i) {
6405 panic("BUCKET_CHECK: corruption in bucket %d "
6406 "at %p: page %p object %p offset 0x%llx "
6407 "hash %d\n",
6408 i, bucket, p, p->object, p->offset,
6409 p_hash);
6410 }
6411 p = p->next;
6412 }
6413 lck_spin_unlock(bucket_lock);
6414 }
6415
6416// printf("BUCKET_CHECK: checked buckets\n");
6417}
6418#endif /* VM_PAGE_BUCKETS_CHECK */