]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-2050.18.24.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
91447636 67
9bccf70c 68#include <mach/clock_types.h>
1c79356b
A
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
2d21ac55 71#include <mach/sdt.h>
1c79356b
A
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
b0d623f7 76#include <kern/kalloc.h>
1c79356b
A
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <vm/pmap.h>
80#include <vm/vm_init.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83#include <vm/vm_pageout.h>
84#include <vm/vm_kern.h> /* kernel_memory_allocate() */
85#include <kern/misc_protos.h>
86#include <zone_debug.h>
87#include <vm/cpm.h>
6d2010ae 88#include <pexpert/pexpert.h>
55e303ae 89
91447636 90#include <vm/vm_protos.h>
2d21ac55
A
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
b0d623f7
A
94#include <IOKit/IOHibernatePrivate.h>
95
b0d623f7
A
96#include <sys/kdebug.h>
97
316670eb 98boolean_t hibernate_cleaning_in_progress = FALSE;
b0d623f7
A
99boolean_t vm_page_free_verify = TRUE;
100
6d2010ae
A
101uint32_t vm_lopage_free_count = 0;
102uint32_t vm_lopage_free_limit = 0;
103uint32_t vm_lopage_lowater = 0;
0b4c1975
A
104boolean_t vm_lopage_refill = FALSE;
105boolean_t vm_lopage_needed = FALSE;
106
b0d623f7
A
107lck_mtx_ext_t vm_page_queue_lock_ext;
108lck_mtx_ext_t vm_page_queue_free_lock_ext;
109lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55 110
0b4c1975
A
111int speculative_age_index = 0;
112int speculative_steal_index = 0;
2d21ac55
A
113struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
114
0b4e3aa0 115
b0d623f7
A
116__private_extern__ void vm_page_init_lck_grp(void);
117
6d2010ae
A
118static void vm_page_free_prepare(vm_page_t page);
119static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
120
b0d623f7
A
121
122
123
1c79356b
A
124/*
125 * Associated with page of user-allocatable memory is a
126 * page structure.
127 */
128
129/*
130 * These variables record the values returned by vm_page_bootstrap,
131 * for debugging purposes. The implementation of pmap_steal_memory
132 * and pmap_startup here also uses them internally.
133 */
134
135vm_offset_t virtual_space_start;
136vm_offset_t virtual_space_end;
7ddcb079 137uint32_t vm_page_pages;
1c79356b
A
138
139/*
140 * The vm_page_lookup() routine, which provides for fast
141 * (virtual memory object, offset) to page lookup, employs
142 * the following hash table. The vm_page_{insert,remove}
143 * routines install and remove associations in the table.
144 * [This table is often called the virtual-to-physical,
145 * or VP, table.]
146 */
147typedef struct {
148 vm_page_t pages;
149#if MACH_PAGE_HASH_STATS
150 int cur_count; /* current count */
151 int hi_count; /* high water mark */
152#endif /* MACH_PAGE_HASH_STATS */
153} vm_page_bucket_t;
154
b0d623f7
A
155
156#define BUCKETS_PER_LOCK 16
157
1c79356b
A
158vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
159unsigned int vm_page_bucket_count = 0; /* How big is array? */
160unsigned int vm_page_hash_mask; /* Mask for hash function */
161unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 162uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
163unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
164
165lck_spin_t *vm_page_bucket_locks;
1c79356b 166
91447636 167
1c79356b
A
168#if MACH_PAGE_HASH_STATS
169/* This routine is only for debug. It is intended to be called by
170 * hand by a developer using a kernel debugger. This routine prints
171 * out vm_page_hash table statistics to the kernel debug console.
172 */
173void
174hash_debug(void)
175{
176 int i;
177 int numbuckets = 0;
178 int highsum = 0;
179 int maxdepth = 0;
180
181 for (i = 0; i < vm_page_bucket_count; i++) {
182 if (vm_page_buckets[i].hi_count) {
183 numbuckets++;
184 highsum += vm_page_buckets[i].hi_count;
185 if (vm_page_buckets[i].hi_count > maxdepth)
186 maxdepth = vm_page_buckets[i].hi_count;
187 }
188 }
189 printf("Total number of buckets: %d\n", vm_page_bucket_count);
190 printf("Number used buckets: %d = %d%%\n",
191 numbuckets, 100*numbuckets/vm_page_bucket_count);
192 printf("Number unused buckets: %d = %d%%\n",
193 vm_page_bucket_count - numbuckets,
194 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
195 printf("Sum of bucket max depth: %d\n", highsum);
196 printf("Average bucket depth: %d.%2d\n",
197 highsum/vm_page_bucket_count,
198 highsum%vm_page_bucket_count);
199 printf("Maximum bucket depth: %d\n", maxdepth);
200}
201#endif /* MACH_PAGE_HASH_STATS */
202
203/*
204 * The virtual page size is currently implemented as a runtime
205 * variable, but is constant once initialized using vm_set_page_size.
206 * This initialization must be done in the machine-dependent
207 * bootstrap sequence, before calling other machine-independent
208 * initializations.
209 *
210 * All references to the virtual page size outside this
211 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
212 * constants.
213 */
55e303ae
A
214vm_size_t page_size = PAGE_SIZE;
215vm_size_t page_mask = PAGE_MASK;
2d21ac55 216int page_shift = PAGE_SHIFT;
1c79356b
A
217
218/*
219 * Resident page structures are initialized from
220 * a template (see vm_page_alloc).
221 *
222 * When adding a new field to the virtual memory
223 * object structure, be sure to add initialization
224 * (see vm_page_bootstrap).
225 */
226struct vm_page vm_page_template;
227
2d21ac55
A
228vm_page_t vm_pages = VM_PAGE_NULL;
229unsigned int vm_pages_count = 0;
0b4c1975 230ppnum_t vm_page_lowest = 0;
2d21ac55 231
1c79356b
A
232/*
233 * Resident pages that represent real memory
2d21ac55
A
234 * are allocated from a set of free lists,
235 * one per color.
1c79356b 236 */
2d21ac55
A
237unsigned int vm_colors;
238unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
239unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
240queue_head_t vm_page_queue_free[MAX_COLORS];
1c79356b 241unsigned int vm_page_free_wanted;
2d21ac55 242unsigned int vm_page_free_wanted_privileged;
91447636
A
243unsigned int vm_page_free_count;
244unsigned int vm_page_fictitious_count;
1c79356b
A
245
246unsigned int vm_page_free_count_minimum; /* debugging */
247
248/*
249 * Occasionally, the virtual memory system uses
250 * resident page structures that do not refer to
251 * real pages, for example to leave a page with
252 * important state information in the VP table.
253 *
254 * These page structures are allocated the way
255 * most other kernel structures are.
256 */
257zone_t vm_page_zone;
b0d623f7
A
258vm_locks_array_t vm_page_locks;
259decl_lck_mtx_data(,vm_page_alloc_lock)
316670eb
A
260lck_mtx_ext_t vm_page_alloc_lock_ext;
261
9bccf70c 262unsigned int io_throttle_zero_fill;
1c79356b 263
b0d623f7
A
264unsigned int vm_page_local_q_count = 0;
265unsigned int vm_page_local_q_soft_limit = 250;
266unsigned int vm_page_local_q_hard_limit = 500;
267struct vplq *vm_page_local_q = NULL;
268
316670eb
A
269/* N.B. Guard and fictitious pages must not
270 * be assigned a zero phys_page value.
271 */
1c79356b
A
272/*
273 * Fictitious pages don't have a physical address,
55e303ae 274 * but we must initialize phys_page to something.
1c79356b
A
275 * For debugging, this should be a strange value
276 * that the pmap module can recognize in assertions.
277 */
b0d623f7 278ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 279
2d21ac55
A
280/*
281 * Guard pages are not accessible so they don't
282 * need a physical address, but we need to enter
283 * one in the pmap.
284 * Let's make it recognizable and make sure that
285 * we don't use a real physical page with that
286 * physical address.
287 */
b0d623f7 288ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 289
1c79356b
A
290/*
291 * Resident page structures are also chained on
292 * queues that are used by the page replacement
293 * system (pageout daemon). These queues are
294 * defined here, but are shared by the pageout
9bccf70c
A
295 * module. The inactive queue is broken into
296 * inactive and zf for convenience as the
297 * pageout daemon often assignes a higher
298 * affinity to zf pages
1c79356b
A
299 */
300queue_head_t vm_page_queue_active;
301queue_head_t vm_page_queue_inactive;
316670eb 302queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
b0d623f7 303queue_head_t vm_page_queue_throttled;
2d21ac55 304
91447636
A
305unsigned int vm_page_active_count;
306unsigned int vm_page_inactive_count;
316670eb 307unsigned int vm_page_anonymous_count;
2d21ac55
A
308unsigned int vm_page_throttled_count;
309unsigned int vm_page_speculative_count;
91447636 310unsigned int vm_page_wire_count;
0b4c1975 311unsigned int vm_page_wire_count_initial;
91447636
A
312unsigned int vm_page_gobble_count = 0;
313unsigned int vm_page_wire_count_warning = 0;
314unsigned int vm_page_gobble_count_warning = 0;
315
316unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 317unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 318uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 319
b0d623f7 320#if DEVELOPMENT || DEBUG
2d21ac55
A
321unsigned int vm_page_speculative_recreated = 0;
322unsigned int vm_page_speculative_created = 0;
323unsigned int vm_page_speculative_used = 0;
b0d623f7 324#endif
2d21ac55 325
316670eb
A
326queue_head_t vm_page_queue_cleaned;
327
328unsigned int vm_page_cleaned_count = 0;
329unsigned int vm_pageout_enqueued_cleaned = 0;
330
0c530ab8 331uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
0b4c1975 332ppnum_t max_valid_low_ppnum = 0xffffffff;
0c530ab8
A
333
334
1c79356b
A
335/*
336 * Several page replacement parameters are also
337 * shared with this module, so that page allocation
338 * (done here in vm_page_alloc) can trigger the
339 * pageout daemon.
340 */
91447636
A
341unsigned int vm_page_free_target = 0;
342unsigned int vm_page_free_min = 0;
b0d623f7
A
343unsigned int vm_page_throttle_limit = 0;
344uint32_t vm_page_creation_throttle = 0;
91447636 345unsigned int vm_page_inactive_target = 0;
316670eb 346unsigned int vm_page_anonymous_min = 0;
2d21ac55 347unsigned int vm_page_inactive_min = 0;
91447636 348unsigned int vm_page_free_reserved = 0;
b0d623f7 349unsigned int vm_page_throttle_count = 0;
1c79356b 350
316670eb 351
1c79356b
A
352/*
353 * The VM system has a couple of heuristics for deciding
354 * that pages are "uninteresting" and should be placed
355 * on the inactive queue as likely candidates for replacement.
356 * These variables let the heuristics be controlled at run-time
357 * to make experimentation easier.
358 */
359
360boolean_t vm_page_deactivate_hint = TRUE;
361
b0d623f7
A
362struct vm_page_stats_reusable vm_page_stats_reusable;
363
1c79356b
A
364/*
365 * vm_set_page_size:
366 *
367 * Sets the page size, perhaps based upon the memory
368 * size. Must be called before any use of page-size
369 * dependent functions.
370 *
371 * Sets page_shift and page_mask from page_size.
372 */
373void
374vm_set_page_size(void)
375{
1c79356b
A
376 page_mask = page_size - 1;
377
378 if ((page_mask & page_size) != 0)
379 panic("vm_set_page_size: page size not a power of two");
380
381 for (page_shift = 0; ; page_shift++)
91447636 382 if ((1U << page_shift) == page_size)
1c79356b 383 break;
1c79356b
A
384}
385
2d21ac55
A
386
387/* Called once during statup, once the cache geometry is known.
388 */
389static void
390vm_page_set_colors( void )
391{
392 unsigned int n, override;
393
593a1d5f 394 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
395 n = override;
396 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
397 n = vm_cache_geometry_colors;
398 else n = DEFAULT_COLORS; /* use default if all else fails */
399
400 if ( n == 0 )
401 n = 1;
402 if ( n > MAX_COLORS )
403 n = MAX_COLORS;
404
405 /* the count must be a power of 2 */
b0d623f7 406 if ( ( n & (n - 1)) != 0 )
2d21ac55
A
407 panic("vm_page_set_colors");
408
409 vm_colors = n;
410 vm_color_mask = n - 1;
411}
412
413
b0d623f7
A
414lck_grp_t vm_page_lck_grp_free;
415lck_grp_t vm_page_lck_grp_queue;
416lck_grp_t vm_page_lck_grp_local;
417lck_grp_t vm_page_lck_grp_purge;
418lck_grp_t vm_page_lck_grp_alloc;
419lck_grp_t vm_page_lck_grp_bucket;
420lck_grp_attr_t vm_page_lck_grp_attr;
421lck_attr_t vm_page_lck_attr;
422
423
424__private_extern__ void
425vm_page_init_lck_grp(void)
426{
427 /*
428 * initialze the vm_page lock world
429 */
430 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
431 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
432 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
433 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
434 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
435 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
436 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
437 lck_attr_setdefault(&vm_page_lck_attr);
316670eb 438 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
b0d623f7
A
439}
440
441void
442vm_page_init_local_q()
443{
444 unsigned int num_cpus;
445 unsigned int i;
446 struct vplq *t_local_q;
447
448 num_cpus = ml_get_max_cpus();
449
450 /*
451 * no point in this for a uni-processor system
452 */
453 if (num_cpus >= 2) {
454 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
455
456 for (i = 0; i < num_cpus; i++) {
457 struct vpl *lq;
458
459 lq = &t_local_q[i].vpl_un.vpl;
460 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
461 queue_init(&lq->vpl_queue);
462 lq->vpl_count = 0;
463 }
464 vm_page_local_q_count = num_cpus;
465
466 vm_page_local_q = (struct vplq *)t_local_q;
467 }
468}
469
470
1c79356b
A
471/*
472 * vm_page_bootstrap:
473 *
474 * Initializes the resident memory module.
475 *
476 * Allocates memory for the page cells, and
477 * for the object/offset-to-page hash table headers.
478 * Each page cell is initialized and placed on the free list.
479 * Returns the range of available kernel virtual memory.
480 */
481
482void
483vm_page_bootstrap(
484 vm_offset_t *startp,
485 vm_offset_t *endp)
486{
487 register vm_page_t m;
91447636 488 unsigned int i;
1c79356b
A
489 unsigned int log1;
490 unsigned int log2;
491 unsigned int size;
492
493 /*
494 * Initialize the vm_page template.
495 */
496
497 m = &vm_page_template;
b0d623f7 498 bzero(m, sizeof (*m));
1c79356b 499
91447636
A
500 m->pageq.next = NULL;
501 m->pageq.prev = NULL;
502 m->listq.next = NULL;
503 m->listq.prev = NULL;
b0d623f7 504 m->next = VM_PAGE_NULL;
91447636 505
b0d623f7
A
506 m->object = VM_OBJECT_NULL; /* reset later */
507 m->offset = (vm_object_offset_t) -1; /* reset later */
508
509 m->wire_count = 0;
510 m->local = FALSE;
1c79356b
A
511 m->inactive = FALSE;
512 m->active = FALSE;
b0d623f7
A
513 m->pageout_queue = FALSE;
514 m->speculative = FALSE;
1c79356b
A
515 m->laundry = FALSE;
516 m->free = FALSE;
517 m->reference = FALSE;
b0d623f7
A
518 m->gobbled = FALSE;
519 m->private = FALSE;
520 m->throttled = FALSE;
521 m->__unused_pageq_bits = 0;
522
523 m->phys_page = 0; /* reset later */
1c79356b
A
524
525 m->busy = TRUE;
526 m->wanted = FALSE;
527 m->tabled = FALSE;
528 m->fictitious = FALSE;
b0d623f7
A
529 m->pmapped = FALSE;
530 m->wpmapped = FALSE;
531 m->pageout = FALSE;
1c79356b
A
532 m->absent = FALSE;
533 m->error = FALSE;
534 m->dirty = FALSE;
535 m->cleaning = FALSE;
536 m->precious = FALSE;
537 m->clustered = FALSE;
b0d623f7 538 m->overwriting = FALSE;
1c79356b 539 m->restart = FALSE;
b0d623f7 540 m->unusual = FALSE;
91447636 541 m->encrypted = FALSE;
2d21ac55 542 m->encrypted_cleaning = FALSE;
b0d623f7
A
543 m->cs_validated = FALSE;
544 m->cs_tainted = FALSE;
545 m->no_cache = FALSE;
b0d623f7 546 m->reusable = FALSE;
6d2010ae 547 m->slid = FALSE;
316670eb 548 m->was_dirty = FALSE;
b0d623f7 549 m->__unused_object_bits = 0;
1c79356b 550
1c79356b 551
1c79356b
A
552 /*
553 * Initialize the page queues.
554 */
b0d623f7
A
555 vm_page_init_lck_grp();
556
557 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
558 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
559 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
560
561 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
562 int group;
563
564 purgeable_queues[i].token_q_head = 0;
565 purgeable_queues[i].token_q_tail = 0;
566 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
567 queue_init(&purgeable_queues[i].objq[group]);
568
569 purgeable_queues[i].type = i;
570 purgeable_queues[i].new_pages = 0;
571#if MACH_ASSERT
572 purgeable_queues[i].debug_count_tokens = 0;
573 purgeable_queues[i].debug_count_objects = 0;
574#endif
575 };
576
577 for (i = 0; i < MAX_COLORS; i++ )
578 queue_init(&vm_page_queue_free[i]);
6d2010ae 579
2d21ac55 580 queue_init(&vm_lopage_queue_free);
1c79356b
A
581 queue_init(&vm_page_queue_active);
582 queue_init(&vm_page_queue_inactive);
316670eb 583 queue_init(&vm_page_queue_cleaned);
2d21ac55 584 queue_init(&vm_page_queue_throttled);
316670eb 585 queue_init(&vm_page_queue_anonymous);
1c79356b 586
2d21ac55
A
587 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
588 queue_init(&vm_page_queue_speculative[i].age_q);
589
590 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
591 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
592 }
1c79356b 593 vm_page_free_wanted = 0;
2d21ac55
A
594 vm_page_free_wanted_privileged = 0;
595
596 vm_page_set_colors();
597
1c79356b
A
598
599 /*
600 * Steal memory for the map and zone subsystems.
601 */
1c79356b 602 zone_steal_memory();
316670eb 603 vm_map_steal_memory();
1c79356b
A
604
605 /*
606 * Allocate (and initialize) the virtual-to-physical
607 * table hash buckets.
608 *
609 * The number of buckets should be a power of two to
610 * get a good hash function. The following computation
611 * chooses the first power of two that is greater
612 * than the number of physical pages in the system.
613 */
614
1c79356b
A
615 if (vm_page_bucket_count == 0) {
616 unsigned int npages = pmap_free_pages();
617
618 vm_page_bucket_count = 1;
619 while (vm_page_bucket_count < npages)
620 vm_page_bucket_count <<= 1;
621 }
b0d623f7 622 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
623
624 vm_page_hash_mask = vm_page_bucket_count - 1;
625
626 /*
627 * Calculate object shift value for hashing algorithm:
628 * O = log2(sizeof(struct vm_object))
629 * B = log2(vm_page_bucket_count)
630 * hash shifts the object left by
631 * B/2 - O
632 */
633 size = vm_page_bucket_count;
634 for (log1 = 0; size > 1; log1++)
635 size /= 2;
636 size = sizeof(struct vm_object);
637 for (log2 = 0; size > 1; log2++)
638 size /= 2;
639 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
640
641 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
642 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
643 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
644
645 if (vm_page_hash_mask & vm_page_bucket_count)
646 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
647
648 vm_page_buckets = (vm_page_bucket_t *)
649 pmap_steal_memory(vm_page_bucket_count *
650 sizeof(vm_page_bucket_t));
651
b0d623f7
A
652 vm_page_bucket_locks = (lck_spin_t *)
653 pmap_steal_memory(vm_page_bucket_lock_count *
654 sizeof(lck_spin_t));
655
1c79356b
A
656 for (i = 0; i < vm_page_bucket_count; i++) {
657 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
658
659 bucket->pages = VM_PAGE_NULL;
660#if MACH_PAGE_HASH_STATS
661 bucket->cur_count = 0;
662 bucket->hi_count = 0;
663#endif /* MACH_PAGE_HASH_STATS */
664 }
665
b0d623f7
A
666 for (i = 0; i < vm_page_bucket_lock_count; i++)
667 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
668
1c79356b
A
669 /*
670 * Machine-dependent code allocates the resident page table.
671 * It uses vm_page_init to initialize the page frames.
672 * The code also returns to us the virtual space available
673 * to the kernel. We don't trust the pmap module
674 * to get the alignment right.
675 */
676
677 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
678 virtual_space_start = round_page(virtual_space_start);
679 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
680
681 *startp = virtual_space_start;
682 *endp = virtual_space_end;
683
684 /*
685 * Compute the initial "wire" count.
686 * Up until now, the pages which have been set aside are not under
687 * the VM system's control, so although they aren't explicitly
688 * wired, they nonetheless can't be moved. At this moment,
689 * all VM managed pages are "free", courtesy of pmap_startup.
690 */
b0d623f7 691 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
0b4c1975
A
692 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
693 vm_page_wire_count_initial = vm_page_wire_count;
1c79356b 694 vm_page_free_count_minimum = vm_page_free_count;
91447636 695
2d21ac55
A
696 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
697 vm_page_free_count, vm_page_wire_count);
698
91447636 699 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
700}
701
702#ifndef MACHINE_PAGES
703/*
704 * We implement pmap_steal_memory and pmap_startup with the help
705 * of two simpler functions, pmap_virtual_space and pmap_next_page.
706 */
707
91447636 708void *
1c79356b
A
709pmap_steal_memory(
710 vm_size_t size)
711{
55e303ae
A
712 vm_offset_t addr, vaddr;
713 ppnum_t phys_page;
1c79356b
A
714
715 /*
716 * We round the size to a round multiple.
717 */
718
719 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
720
721 /*
722 * If this is the first call to pmap_steal_memory,
723 * we have to initialize ourself.
724 */
725
726 if (virtual_space_start == virtual_space_end) {
727 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
728
729 /*
730 * The initial values must be aligned properly, and
731 * we don't trust the pmap module to do it right.
732 */
733
91447636
A
734 virtual_space_start = round_page(virtual_space_start);
735 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
736 }
737
738 /*
739 * Allocate virtual memory for this request.
740 */
741
742 addr = virtual_space_start;
743 virtual_space_start += size;
744
6d2010ae 745 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
746
747 /*
748 * Allocate and map physical pages to back new virtual pages.
749 */
750
91447636 751 for (vaddr = round_page(addr);
1c79356b
A
752 vaddr < addr + size;
753 vaddr += PAGE_SIZE) {
b0d623f7 754
0b4c1975 755 if (!pmap_next_page_hi(&phys_page))
1c79356b
A
756 panic("pmap_steal_memory");
757
758 /*
759 * XXX Logically, these mappings should be wired,
760 * but some pmap modules barf if they are.
761 */
b0d623f7
A
762#if defined(__LP64__)
763 pmap_pre_expand(kernel_pmap, vaddr);
764#endif
1c79356b 765
55e303ae 766 pmap_enter(kernel_pmap, vaddr, phys_page,
316670eb 767 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
9bccf70c 768 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
769 /*
770 * Account for newly stolen memory
771 */
772 vm_page_wire_count++;
773
774 }
775
91447636 776 return (void *) addr;
1c79356b
A
777}
778
779void
780pmap_startup(
781 vm_offset_t *startp,
782 vm_offset_t *endp)
783{
55e303ae 784 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
785 ppnum_t phys_page;
786 addr64_t tmpaddr;
1c79356b
A
787
788 /*
789 * We calculate how many page frames we will have
790 * and then allocate the page structures in one chunk.
791 */
792
55e303ae 793 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 794 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 795 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 796
2d21ac55 797 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
798
799 /*
800 * Initialize the page frames.
801 */
1c79356b 802 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 803 if (!pmap_next_page(&phys_page))
1c79356b 804 break;
0b4c1975
A
805 if (pages_initialized == 0 || phys_page < vm_page_lowest)
806 vm_page_lowest = phys_page;
1c79356b 807
0b4c1975 808 vm_page_init(&vm_pages[i], phys_page, FALSE);
1c79356b
A
809 vm_page_pages++;
810 pages_initialized++;
811 }
2d21ac55 812 vm_pages_count = pages_initialized;
1c79356b 813
0c530ab8
A
814 /*
815 * Check if we want to initialize pages to a known value
816 */
817 fill = 0; /* Assume no fill */
593a1d5f 818 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
316670eb
A
819#if DEBUG
820 /* This slows down booting the DEBUG kernel, particularly on
821 * large memory systems, but is worthwhile in deterministically
822 * trapping uninitialized memory usage.
823 */
824 if (fill == 0) {
825 fill = 1;
826 fillval = 0xDEB8F177;
827 }
828#endif
829 if (fill)
830 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
0c530ab8
A
831 // -debug code remove
832 if (2 == vm_himemory_mode) {
833 // free low -> high so high is preferred
0b4c1975 834 for (i = 1; i <= pages_initialized; i++) {
2d21ac55
A
835 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
836 vm_page_release(&vm_pages[i - 1]);
0c530ab8
A
837 }
838 }
839 else
840 // debug code remove-
841
1c79356b
A
842 /*
843 * Release pages in reverse order so that physical pages
844 * initially get allocated in ascending addresses. This keeps
845 * the devices (which must address physical memory) happy if
846 * they require several consecutive pages.
847 */
0b4c1975 848 for (i = pages_initialized; i > 0; i--) {
2d21ac55
A
849 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
850 vm_page_release(&vm_pages[i - 1]);
1c79356b
A
851 }
852
55e303ae
A
853#if 0
854 {
855 vm_page_t xx, xxo, xxl;
2d21ac55 856 int i, j, k, l;
55e303ae
A
857
858 j = 0; /* (BRINGUP) */
859 xxl = 0;
860
2d21ac55
A
861 for( i = 0; i < vm_colors; i++ ) {
862 queue_iterate(&vm_page_queue_free[i],
863 xx,
864 vm_page_t,
865 pageq) { /* BRINGUP */
866 j++; /* (BRINGUP) */
867 if(j > vm_page_free_count) { /* (BRINGUP) */
868 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 869 }
2d21ac55
A
870
871 l = vm_page_free_count - j; /* (BRINGUP) */
872 k = 0; /* (BRINGUP) */
873
874 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
875
876 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
877 k++;
878 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
879 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
880 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
881 }
882 }
883
884 xxl = xx;
55e303ae
A
885 }
886 }
887
888 if(j != vm_page_free_count) { /* (BRINGUP) */
889 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
890 }
891 }
892#endif
893
894
1c79356b
A
895 /*
896 * We have to re-align virtual_space_start,
897 * because pmap_steal_memory has been using it.
898 */
899
b0d623f7 900 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
901
902 *startp = virtual_space_start;
903 *endp = virtual_space_end;
904}
905#endif /* MACHINE_PAGES */
906
907/*
908 * Routine: vm_page_module_init
909 * Purpose:
910 * Second initialization pass, to be done after
911 * the basic VM system is ready.
912 */
913void
914vm_page_module_init(void)
915{
916 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
917 0, PAGE_SIZE, "vm pages");
918
919#if ZONE_DEBUG
920 zone_debug_disable(vm_page_zone);
921#endif /* ZONE_DEBUG */
922
6d2010ae 923 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1c79356b
A
924 zone_change(vm_page_zone, Z_EXPAND, FALSE);
925 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
926 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
316670eb 927 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1c79356b
A
928 /*
929 * Adjust zone statistics to account for the real pages allocated
930 * in vm_page_create(). [Q: is this really what we want?]
931 */
932 vm_page_zone->count += vm_page_pages;
6d2010ae 933 vm_page_zone->sum_count += vm_page_pages;
1c79356b 934 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1c79356b
A
935}
936
937/*
938 * Routine: vm_page_create
939 * Purpose:
940 * After the VM system is up, machine-dependent code
941 * may stumble across more physical memory. For example,
942 * memory that it was reserving for a frame buffer.
943 * vm_page_create turns this memory into available pages.
944 */
945
946void
947vm_page_create(
55e303ae
A
948 ppnum_t start,
949 ppnum_t end)
1c79356b 950{
55e303ae
A
951 ppnum_t phys_page;
952 vm_page_t m;
1c79356b 953
55e303ae
A
954 for (phys_page = start;
955 phys_page < end;
956 phys_page++) {
6d2010ae 957 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1c79356b
A
958 == VM_PAGE_NULL)
959 vm_page_more_fictitious();
960
6d2010ae 961 m->fictitious = FALSE;
0b4c1975 962 pmap_clear_noencrypt(phys_page);
6d2010ae 963
1c79356b
A
964 vm_page_pages++;
965 vm_page_release(m);
966 }
967}
968
969/*
970 * vm_page_hash:
971 *
972 * Distributes the object/offset key pair among hash buckets.
973 *
55e303ae 974 * NOTE: The bucket count must be a power of 2
1c79356b
A
975 */
976#define vm_page_hash(object, offset) (\
b0d623f7 977 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
978 & vm_page_hash_mask)
979
2d21ac55 980
1c79356b
A
981/*
982 * vm_page_insert: [ internal use only ]
983 *
984 * Inserts the given mem entry into the object/object-page
985 * table and object list.
986 *
987 * The object must be locked.
988 */
1c79356b
A
989void
990vm_page_insert(
2d21ac55
A
991 vm_page_t mem,
992 vm_object_t object,
993 vm_object_offset_t offset)
994{
316670eb 995 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
2d21ac55
A
996}
997
4a3eedf9 998void
2d21ac55
A
999vm_page_insert_internal(
1000 vm_page_t mem,
1001 vm_object_t object,
1002 vm_object_offset_t offset,
b0d623f7 1003 boolean_t queues_lock_held,
316670eb
A
1004 boolean_t insert_in_hash,
1005 boolean_t batch_pmap_op)
1c79356b 1006{
b0d623f7
A
1007 vm_page_bucket_t *bucket;
1008 lck_spin_t *bucket_lock;
1009 int hash_id;
1c79356b
A
1010
1011 XPR(XPR_VM_PAGE,
1012 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 1013 object, offset, mem, 0,0);
316670eb
A
1014#if 0
1015 /*
1016 * we may not hold the page queue lock
1017 * so this check isn't safe to make
1018 */
1c79356b 1019 VM_PAGE_CHECK(mem);
316670eb 1020#endif
1c79356b 1021
2d21ac55
A
1022 if (object == vm_submap_object) {
1023 /* the vm_submap_object is only a placeholder for submaps */
1024 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1025 }
1026
1027 vm_object_lock_assert_exclusive(object);
1028#if DEBUG
b0d623f7
A
1029 lck_mtx_assert(&vm_page_queue_lock,
1030 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1031 : LCK_MTX_ASSERT_NOTOWNED);
1032#endif /* DEBUG */
1033
1034 if (insert_in_hash == TRUE) {
1035#if DEBUG
1036 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1037 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1038 "already in (obj=%p,off=0x%llx)",
1039 mem, object, offset, mem->object, mem->offset);
91447636 1040#endif
6d2010ae 1041 assert(!object->internal || offset < object->vo_size);
1c79356b 1042
b0d623f7
A
1043 /* only insert "pageout" pages into "pageout" objects,
1044 * and normal pages into normal objects */
1045 assert(object->pageout == mem->pageout);
91447636 1046
b0d623f7
A
1047 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1048
1049 /*
1050 * Record the object/offset pair in this page
1051 */
1c79356b 1052
b0d623f7
A
1053 mem->object = object;
1054 mem->offset = offset;
1c79356b 1055
b0d623f7
A
1056 /*
1057 * Insert it into the object_object/offset hash table
1058 */
1059 hash_id = vm_page_hash(object, offset);
1060 bucket = &vm_page_buckets[hash_id];
1061 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1062
1063 lck_spin_lock(bucket_lock);
1c79356b 1064
b0d623f7
A
1065 mem->next = bucket->pages;
1066 bucket->pages = mem;
1c79356b 1067#if MACH_PAGE_HASH_STATS
b0d623f7
A
1068 if (++bucket->cur_count > bucket->hi_count)
1069 bucket->hi_count = bucket->cur_count;
1c79356b 1070#endif /* MACH_PAGE_HASH_STATS */
1c79356b 1071
b0d623f7
A
1072 lck_spin_unlock(bucket_lock);
1073 }
6d2010ae 1074
316670eb
A
1075 {
1076 unsigned int cache_attr;
6d2010ae
A
1077
1078 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1079
1080 if (cache_attr != VM_WIMG_USE_DEFAULT) {
316670eb 1081 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
6d2010ae
A
1082 }
1083 }
1c79356b
A
1084 /*
1085 * Now link into the object's list of backed pages.
1086 */
1087
91447636 1088 VM_PAGE_INSERT(mem, object);
1c79356b
A
1089 mem->tabled = TRUE;
1090
1091 /*
1092 * Show that the object has one more resident page.
1093 */
1094
1095 object->resident_page_count++;
b0d623f7
A
1096 if (VM_PAGE_WIRED(mem)) {
1097 object->wired_page_count++;
1098 }
1099 assert(object->resident_page_count >= object->wired_page_count);
91447636 1100
b0d623f7 1101 assert(!mem->reusable);
2d21ac55 1102
b0d623f7
A
1103 if (object->purgable == VM_PURGABLE_VOLATILE) {
1104 if (VM_PAGE_WIRED(mem)) {
1105 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1106 } else {
1107 OSAddAtomic(1, &vm_page_purgeable_count);
1108 }
593a1d5f
A
1109 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1110 mem->throttled) {
b0d623f7
A
1111 /*
1112 * This page belongs to a purged VM object but hasn't
1113 * been purged (because it was "busy").
1114 * It's in the "throttled" queue and hence not
1115 * visible to vm_pageout_scan(). Move it to a pageable
1116 * queue, so that it can eventually be reclaimed, instead
1117 * of lingering in the "empty" object.
1118 */
593a1d5f 1119 if (queues_lock_held == FALSE)
b0d623f7 1120 vm_page_lockspin_queues();
593a1d5f 1121 vm_page_deactivate(mem);
2d21ac55
A
1122 if (queues_lock_held == FALSE)
1123 vm_page_unlock_queues();
91447636 1124 }
1c79356b
A
1125}
1126
1127/*
1128 * vm_page_replace:
1129 *
1130 * Exactly like vm_page_insert, except that we first
1131 * remove any existing page at the given offset in object.
1132 *
b0d623f7 1133 * The object must be locked.
1c79356b 1134 */
1c79356b
A
1135void
1136vm_page_replace(
1137 register vm_page_t mem,
1138 register vm_object_t object,
1139 register vm_object_offset_t offset)
1140{
0c530ab8
A
1141 vm_page_bucket_t *bucket;
1142 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1143 lck_spin_t *bucket_lock;
1144 int hash_id;
1c79356b 1145
316670eb
A
1146#if 0
1147 /*
1148 * we don't hold the page queue lock
1149 * so this check isn't safe to make
1150 */
1c79356b 1151 VM_PAGE_CHECK(mem);
316670eb 1152#endif
2d21ac55 1153 vm_object_lock_assert_exclusive(object);
91447636 1154#if DEBUG
91447636
A
1155 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1156 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1157 "already in (obj=%p,off=0x%llx)",
1158 mem, object, offset, mem->object, mem->offset);
b0d623f7 1159 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
91447636 1160#endif
1c79356b
A
1161 /*
1162 * Record the object/offset pair in this page
1163 */
1164
1165 mem->object = object;
1166 mem->offset = offset;
1167
1168 /*
1169 * Insert it into the object_object/offset hash table,
1170 * replacing any page that might have been there.
1171 */
1172
b0d623f7
A
1173 hash_id = vm_page_hash(object, offset);
1174 bucket = &vm_page_buckets[hash_id];
1175 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1176
1177 lck_spin_lock(bucket_lock);
0c530ab8 1178
1c79356b
A
1179 if (bucket->pages) {
1180 vm_page_t *mp = &bucket->pages;
b0d623f7 1181 vm_page_t m = *mp;
0c530ab8 1182
1c79356b
A
1183 do {
1184 if (m->object == object && m->offset == offset) {
1185 /*
0c530ab8 1186 * Remove old page from hash list
1c79356b
A
1187 */
1188 *mp = m->next;
1c79356b 1189
0c530ab8 1190 found_m = m;
1c79356b
A
1191 break;
1192 }
1193 mp = &m->next;
91447636 1194 } while ((m = *mp));
0c530ab8 1195
1c79356b
A
1196 mem->next = bucket->pages;
1197 } else {
1198 mem->next = VM_PAGE_NULL;
1199 }
0c530ab8
A
1200 /*
1201 * insert new page at head of hash list
1202 */
1c79356b 1203 bucket->pages = mem;
0c530ab8 1204
b0d623f7 1205 lck_spin_unlock(bucket_lock);
1c79356b 1206
0c530ab8
A
1207 if (found_m) {
1208 /*
1209 * there was already a page at the specified
1210 * offset for this object... remove it from
1211 * the object and free it back to the free list
1212 */
b0d623f7 1213 vm_page_free_unlocked(found_m, FALSE);
91447636 1214 }
316670eb 1215 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1c79356b
A
1216}
1217
1218/*
1219 * vm_page_remove: [ internal use only ]
1220 *
1221 * Removes the given mem entry from the object/offset-page
1222 * table and the object page list.
1223 *
b0d623f7 1224 * The object must be locked.
1c79356b
A
1225 */
1226
1227void
1228vm_page_remove(
b0d623f7
A
1229 vm_page_t mem,
1230 boolean_t remove_from_hash)
1c79356b 1231{
b0d623f7
A
1232 vm_page_bucket_t *bucket;
1233 vm_page_t this;
1234 lck_spin_t *bucket_lock;
1235 int hash_id;
1c79356b
A
1236
1237 XPR(XPR_VM_PAGE,
1238 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7
A
1239 mem->object, mem->offset,
1240 mem, 0,0);
1241
2d21ac55 1242 vm_object_lock_assert_exclusive(mem->object);
1c79356b
A
1243 assert(mem->tabled);
1244 assert(!mem->cleaning);
316670eb
A
1245 assert(!mem->laundry);
1246#if 0
1247 /*
1248 * we don't hold the page queue lock
1249 * so this check isn't safe to make
1250 */
1c79356b 1251 VM_PAGE_CHECK(mem);
316670eb 1252#endif
b0d623f7
A
1253 if (remove_from_hash == TRUE) {
1254 /*
1255 * Remove from the object_object/offset hash table
1256 */
1257 hash_id = vm_page_hash(mem->object, mem->offset);
1258 bucket = &vm_page_buckets[hash_id];
1259 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1260
b0d623f7 1261 lck_spin_lock(bucket_lock);
1c79356b 1262
b0d623f7
A
1263 if ((this = bucket->pages) == mem) {
1264 /* optimize for common case */
1c79356b 1265
b0d623f7
A
1266 bucket->pages = mem->next;
1267 } else {
1268 vm_page_t *prev;
1c79356b 1269
b0d623f7
A
1270 for (prev = &this->next;
1271 (this = *prev) != mem;
1272 prev = &this->next)
1273 continue;
1274 *prev = this->next;
1275 }
1c79356b 1276#if MACH_PAGE_HASH_STATS
b0d623f7 1277 bucket->cur_count--;
1c79356b 1278#endif /* MACH_PAGE_HASH_STATS */
1c79356b 1279
b0d623f7
A
1280 lck_spin_unlock(bucket_lock);
1281 }
1c79356b
A
1282 /*
1283 * Now remove from the object's list of backed pages.
1284 */
1285
91447636 1286 VM_PAGE_REMOVE(mem);
1c79356b
A
1287
1288 /*
1289 * And show that the object has one fewer resident
1290 * page.
1291 */
1292
b0d623f7 1293 assert(mem->object->resident_page_count > 0);
1c79356b 1294 mem->object->resident_page_count--;
6d2010ae
A
1295
1296 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1297 if (mem->object->resident_page_count == 0)
1298 vm_object_cache_remove(mem->object);
1299 }
1300
b0d623f7
A
1301 if (VM_PAGE_WIRED(mem)) {
1302 assert(mem->object->wired_page_count > 0);
1303 mem->object->wired_page_count--;
1304 }
1305 assert(mem->object->resident_page_count >=
1306 mem->object->wired_page_count);
1307 if (mem->reusable) {
1308 assert(mem->object->reusable_page_count > 0);
1309 mem->object->reusable_page_count--;
1310 assert(mem->object->reusable_page_count <=
1311 mem->object->resident_page_count);
1312 mem->reusable = FALSE;
1313 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1314 vm_page_stats_reusable.reused_remove++;
1315 } else if (mem->object->all_reusable) {
1316 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1317 vm_page_stats_reusable.reused_remove++;
1318 }
1c79356b 1319
593a1d5f 1320 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1321 if (VM_PAGE_WIRED(mem)) {
1322 assert(vm_page_purgeable_wired_count > 0);
1323 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1324 } else {
1325 assert(vm_page_purgeable_count > 0);
1326 OSAddAtomic(-1, &vm_page_purgeable_count);
1327 }
91447636 1328 }
6d2010ae
A
1329 if (mem->object->set_cache_attr == TRUE)
1330 pmap_set_cache_attributes(mem->phys_page, 0);
1331
1c79356b
A
1332 mem->tabled = FALSE;
1333 mem->object = VM_OBJECT_NULL;
91447636 1334 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1335}
1336
b0d623f7 1337
1c79356b
A
1338/*
1339 * vm_page_lookup:
1340 *
1341 * Returns the page associated with the object/offset
1342 * pair specified; if none is found, VM_PAGE_NULL is returned.
1343 *
1344 * The object must be locked. No side effects.
1345 */
1346
91447636
A
1347unsigned long vm_page_lookup_hint = 0;
1348unsigned long vm_page_lookup_hint_next = 0;
1349unsigned long vm_page_lookup_hint_prev = 0;
1350unsigned long vm_page_lookup_hint_miss = 0;
2d21ac55
A
1351unsigned long vm_page_lookup_bucket_NULL = 0;
1352unsigned long vm_page_lookup_miss = 0;
1353
91447636 1354
1c79356b
A
1355vm_page_t
1356vm_page_lookup(
b0d623f7
A
1357 vm_object_t object,
1358 vm_object_offset_t offset)
1c79356b 1359{
b0d623f7
A
1360 vm_page_t mem;
1361 vm_page_bucket_t *bucket;
1362 queue_entry_t qe;
1363 lck_spin_t *bucket_lock;
1364 int hash_id;
91447636 1365
2d21ac55 1366 vm_object_lock_assert_held(object);
91447636 1367 mem = object->memq_hint;
2d21ac55 1368
91447636
A
1369 if (mem != VM_PAGE_NULL) {
1370 assert(mem->object == object);
2d21ac55 1371
91447636
A
1372 if (mem->offset == offset) {
1373 vm_page_lookup_hint++;
1374 return mem;
1375 }
1376 qe = queue_next(&mem->listq);
2d21ac55 1377
91447636
A
1378 if (! queue_end(&object->memq, qe)) {
1379 vm_page_t next_page;
1380
1381 next_page = (vm_page_t) qe;
1382 assert(next_page->object == object);
2d21ac55 1383
91447636
A
1384 if (next_page->offset == offset) {
1385 vm_page_lookup_hint_next++;
1386 object->memq_hint = next_page; /* new hint */
1387 return next_page;
1388 }
1389 }
1390 qe = queue_prev(&mem->listq);
2d21ac55 1391
91447636
A
1392 if (! queue_end(&object->memq, qe)) {
1393 vm_page_t prev_page;
1394
1395 prev_page = (vm_page_t) qe;
1396 assert(prev_page->object == object);
2d21ac55 1397
91447636
A
1398 if (prev_page->offset == offset) {
1399 vm_page_lookup_hint_prev++;
1400 object->memq_hint = prev_page; /* new hint */
1401 return prev_page;
1402 }
1403 }
1404 }
1c79356b 1405 /*
2d21ac55 1406 * Search the hash table for this object/offset pair
1c79356b 1407 */
b0d623f7
A
1408 hash_id = vm_page_hash(object, offset);
1409 bucket = &vm_page_buckets[hash_id];
1c79356b 1410
2d21ac55
A
1411 /*
1412 * since we hold the object lock, we are guaranteed that no
1413 * new pages can be inserted into this object... this in turn
1414 * guarantess that the page we're looking for can't exist
1415 * if the bucket it hashes to is currently NULL even when looked
1416 * at outside the scope of the hash bucket lock... this is a
1417 * really cheap optimiztion to avoid taking the lock
1418 */
1419 if (bucket->pages == VM_PAGE_NULL) {
1420 vm_page_lookup_bucket_NULL++;
1421
1422 return (VM_PAGE_NULL);
1423 }
b0d623f7
A
1424 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1425
1426 lck_spin_lock(bucket_lock);
0c530ab8 1427
1c79356b 1428 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
316670eb
A
1429#if 0
1430 /*
1431 * we don't hold the page queue lock
1432 * so this check isn't safe to make
1433 */
1c79356b 1434 VM_PAGE_CHECK(mem);
316670eb 1435#endif
1c79356b
A
1436 if ((mem->object == object) && (mem->offset == offset))
1437 break;
1438 }
b0d623f7 1439 lck_spin_unlock(bucket_lock);
55e303ae 1440
91447636
A
1441 if (mem != VM_PAGE_NULL) {
1442 if (object->memq_hint != VM_PAGE_NULL) {
1443 vm_page_lookup_hint_miss++;
1444 }
1445 assert(mem->object == object);
1446 object->memq_hint = mem;
2d21ac55
A
1447 } else
1448 vm_page_lookup_miss++;
91447636
A
1449
1450 return(mem);
1451}
1452
1453
1c79356b
A
1454/*
1455 * vm_page_rename:
1456 *
1457 * Move the given memory entry from its
1458 * current object to the specified target object/offset.
1459 *
1460 * The object must be locked.
1461 */
1462void
1463vm_page_rename(
1464 register vm_page_t mem,
1465 register vm_object_t new_object,
2d21ac55
A
1466 vm_object_offset_t new_offset,
1467 boolean_t encrypted_ok)
1c79356b
A
1468{
1469 assert(mem->object != new_object);
2d21ac55 1470
91447636
A
1471 /*
1472 * ENCRYPTED SWAP:
1473 * The encryption key is based on the page's memory object
1474 * (aka "pager") and paging offset. Moving the page to
1475 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
1476 * so it has to be decrypted first, or we would lose the key.
1477 *
1478 * One exception is VM object collapsing, where we transfer pages
1479 * from one backing object to its parent object. This operation also
1480 * transfers the paging information, so the <pager,paging_offset> info
1481 * should remain consistent. The caller (vm_object_do_collapse())
1482 * sets "encrypted_ok" in this case.
91447636 1483 */
2d21ac55 1484 if (!encrypted_ok && mem->encrypted) {
91447636
A
1485 panic("vm_page_rename: page %p is encrypted\n", mem);
1486 }
2d21ac55 1487
b0d623f7
A
1488 XPR(XPR_VM_PAGE,
1489 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1490 new_object, new_offset,
1491 mem, 0,0);
1492
1c79356b
A
1493 /*
1494 * Changes to mem->object require the page lock because
1495 * the pageout daemon uses that lock to get the object.
1496 */
b0d623f7 1497 vm_page_lockspin_queues();
1c79356b 1498
b0d623f7 1499 vm_page_remove(mem, TRUE);
316670eb 1500 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1c79356b 1501
1c79356b
A
1502 vm_page_unlock_queues();
1503}
1504
1505/*
1506 * vm_page_init:
1507 *
1508 * Initialize the fields in a new page.
1509 * This takes a structure with random values and initializes it
1510 * so that it can be given to vm_page_release or vm_page_insert.
1511 */
1512void
1513vm_page_init(
1514 vm_page_t mem,
0b4c1975
A
1515 ppnum_t phys_page,
1516 boolean_t lopage)
1c79356b 1517{
91447636 1518 assert(phys_page);
7ddcb079
A
1519
1520#if DEBUG
1521 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1522 if (!(pmap_valid_page(phys_page))) {
1523 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1524 }
1525 }
1526#endif
1c79356b 1527 *mem = vm_page_template;
55e303ae 1528 mem->phys_page = phys_page;
6d2010ae
A
1529#if 0
1530 /*
1531 * we're leaving this turned off for now... currently pages
1532 * come off the free list and are either immediately dirtied/referenced
1533 * due to zero-fill or COW faults, or are used to read or write files...
1534 * in the file I/O case, the UPL mechanism takes care of clearing
1535 * the state of the HW ref/mod bits in a somewhat fragile way.
1536 * Since we may change the way this works in the future (to toughen it up),
1537 * I'm leaving this as a reminder of where these bits could get cleared
1538 */
1539
1540 /*
1541 * make sure both the h/w referenced and modified bits are
1542 * clear at this point... we are especially dependent on
1543 * not finding a 'stale' h/w modified in a number of spots
1544 * once this page goes back into use
1545 */
1546 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1547#endif
0b4c1975 1548 mem->lopage = lopage;
1c79356b
A
1549}
1550
1551/*
1552 * vm_page_grab_fictitious:
1553 *
1554 * Remove a fictitious page from the free list.
1555 * Returns VM_PAGE_NULL if there are no free pages.
1556 */
1557int c_vm_page_grab_fictitious = 0;
6d2010ae 1558int c_vm_page_grab_fictitious_failed = 0;
1c79356b
A
1559int c_vm_page_release_fictitious = 0;
1560int c_vm_page_more_fictitious = 0;
1561
1562vm_page_t
2d21ac55 1563vm_page_grab_fictitious_common(
b0d623f7 1564 ppnum_t phys_addr)
1c79356b 1565{
6d2010ae
A
1566 vm_page_t m;
1567
1568 if ((m = (vm_page_t)zget(vm_page_zone))) {
1c79356b 1569
0b4c1975 1570 vm_page_init(m, phys_addr, FALSE);
1c79356b 1571 m->fictitious = TRUE;
1c79356b 1572
6d2010ae
A
1573 c_vm_page_grab_fictitious++;
1574 } else
1575 c_vm_page_grab_fictitious_failed++;
1576
1c79356b
A
1577 return m;
1578}
1579
2d21ac55
A
1580vm_page_t
1581vm_page_grab_fictitious(void)
1582{
1583 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1584}
1585
1586vm_page_t
1587vm_page_grab_guard(void)
1588{
1589 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1590}
1591
6d2010ae 1592
1c79356b
A
1593/*
1594 * vm_page_release_fictitious:
1595 *
6d2010ae 1596 * Release a fictitious page to the zone pool
1c79356b 1597 */
1c79356b
A
1598void
1599vm_page_release_fictitious(
6d2010ae 1600 vm_page_t m)
1c79356b
A
1601{
1602 assert(!m->free);
1c79356b 1603 assert(m->fictitious);
2d21ac55
A
1604 assert(m->phys_page == vm_page_fictitious_addr ||
1605 m->phys_page == vm_page_guard_addr);
1c79356b
A
1606
1607 c_vm_page_release_fictitious++;
6d2010ae 1608
91447636 1609 zfree(vm_page_zone, m);
1c79356b
A
1610}
1611
1612/*
1613 * vm_page_more_fictitious:
1614 *
6d2010ae 1615 * Add more fictitious pages to the zone.
1c79356b
A
1616 * Allowed to block. This routine is way intimate
1617 * with the zones code, for several reasons:
1618 * 1. we need to carve some page structures out of physical
1619 * memory before zones work, so they _cannot_ come from
1620 * the zone_map.
1621 * 2. the zone needs to be collectable in order to prevent
1622 * growth without bound. These structures are used by
1623 * the device pager (by the hundreds and thousands), as
1624 * private pages for pageout, and as blocking pages for
1625 * pagein. Temporary bursts in demand should not result in
1626 * permanent allocation of a resource.
1627 * 3. To smooth allocation humps, we allocate single pages
1628 * with kernel_memory_allocate(), and cram them into the
6d2010ae 1629 * zone.
1c79356b
A
1630 */
1631
1632void vm_page_more_fictitious(void)
1633{
6d2010ae
A
1634 vm_offset_t addr;
1635 kern_return_t retval;
1c79356b
A
1636
1637 c_vm_page_more_fictitious++;
1638
1c79356b
A
1639 /*
1640 * Allocate a single page from the zone_map. Do not wait if no physical
1641 * pages are immediately available, and do not zero the space. We need
1642 * our own blocking lock here to prevent having multiple,
1643 * simultaneous requests from piling up on the zone_map lock. Exactly
1644 * one (of our) threads should be potentially waiting on the map lock.
1645 * If winner is not vm-privileged, then the page allocation will fail,
1646 * and it will temporarily block here in the vm_page_wait().
1647 */
b0d623f7 1648 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
1649 /*
1650 * If another thread allocated space, just bail out now.
1651 */
1652 if (zone_free_count(vm_page_zone) > 5) {
1653 /*
1654 * The number "5" is a small number that is larger than the
1655 * number of fictitious pages that any single caller will
1656 * attempt to allocate. Otherwise, a thread will attempt to
1657 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1658 * release all of the resources and locks already acquired,
1659 * and then call this routine. This routine finds the pages
1660 * that the caller released, so fails to allocate new space.
1661 * The process repeats infinitely. The largest known number
1662 * of fictitious pages required in this manner is 2. 5 is
1663 * simply a somewhat larger number.
1664 */
b0d623f7 1665 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1666 return;
1667 }
1668
91447636
A
1669 retval = kernel_memory_allocate(zone_map,
1670 &addr, PAGE_SIZE, VM_PROT_ALL,
1671 KMA_KOBJECT|KMA_NOPAGEWAIT);
1672 if (retval != KERN_SUCCESS) {
1c79356b 1673 /*
6d2010ae 1674 * No page was available. Drop the
1c79356b
A
1675 * lock to give another thread a chance at it, and
1676 * wait for the pageout daemon to make progress.
1677 */
b0d623f7 1678 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1679 vm_page_wait(THREAD_UNINT);
1680 return;
1681 }
7ddcb079 1682 zcram(vm_page_zone, addr, PAGE_SIZE);
6d2010ae 1683
b0d623f7 1684 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1685}
1686
1c79356b
A
1687
1688/*
1689 * vm_pool_low():
1690 *
1691 * Return true if it is not likely that a non-vm_privileged thread
1692 * can get memory without blocking. Advisory only, since the
1693 * situation may change under us.
1694 */
1695int
1696vm_pool_low(void)
1697{
1698 /* No locking, at worst we will fib. */
b0d623f7 1699 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
1700}
1701
0c530ab8
A
1702
1703
1704/*
1705 * this is an interface to support bring-up of drivers
1706 * on platforms with physical memory > 4G...
1707 */
1708int vm_himemory_mode = 0;
1709
1710
1711/*
1712 * this interface exists to support hardware controllers
1713 * incapable of generating DMAs with more than 32 bits
1714 * of address on platforms with physical memory > 4G...
1715 */
0b4c1975
A
1716unsigned int vm_lopages_allocated_q = 0;
1717unsigned int vm_lopages_allocated_cpm_success = 0;
1718unsigned int vm_lopages_allocated_cpm_failed = 0;
2d21ac55 1719queue_head_t vm_lopage_queue_free;
0c530ab8
A
1720
1721vm_page_t
1722vm_page_grablo(void)
1723{
0b4c1975 1724 vm_page_t mem;
0c530ab8 1725
0b4c1975 1726 if (vm_lopage_needed == FALSE)
0c530ab8
A
1727 return (vm_page_grab());
1728
b0d623f7 1729 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 1730
0b4c1975
A
1731 if ( !queue_empty(&vm_lopage_queue_free)) {
1732 queue_remove_first(&vm_lopage_queue_free,
1733 mem,
1734 vm_page_t,
1735 pageq);
1736 assert(vm_lopage_free_count);
0c530ab8 1737
0b4c1975
A
1738 vm_lopage_free_count--;
1739 vm_lopages_allocated_q++;
1740
1741 if (vm_lopage_free_count < vm_lopage_lowater)
1742 vm_lopage_refill = TRUE;
0c530ab8 1743
0b4c1975 1744 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 1745 } else {
0b4c1975
A
1746 lck_mtx_unlock(&vm_page_queue_free_lock);
1747
1748 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1749
1750 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1751 vm_lopages_allocated_cpm_failed++;
1752 lck_mtx_unlock(&vm_page_queue_free_lock);
1753
1754 return (VM_PAGE_NULL);
1755 }
1756 mem->busy = TRUE;
1757
1758 vm_page_lockspin_queues();
1759
1760 mem->gobbled = FALSE;
1761 vm_page_gobble_count--;
1762 vm_page_wire_count--;
1763
1764 vm_lopages_allocated_cpm_success++;
1765 vm_page_unlock_queues();
0c530ab8 1766 }
0b4c1975
A
1767 assert(mem->busy);
1768 assert(!mem->free);
1769 assert(!mem->pmapped);
1770 assert(!mem->wpmapped);
7ddcb079 1771 assert(!pmap_is_noencrypt(mem->phys_page));
0b4c1975
A
1772
1773 mem->pageq.next = NULL;
1774 mem->pageq.prev = NULL;
0c530ab8
A
1775
1776 return (mem);
1777}
1778
6d2010ae 1779
1c79356b
A
1780/*
1781 * vm_page_grab:
1782 *
2d21ac55
A
1783 * first try to grab a page from the per-cpu free list...
1784 * this must be done while pre-emption is disabled... if
1785 * a page is available, we're done...
1786 * if no page is available, grab the vm_page_queue_free_lock
1787 * and see if current number of free pages would allow us
1788 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1789 * if there are pages available, disable preemption and
1790 * recheck the state of the per-cpu free list... we could
1791 * have been preempted and moved to a different cpu, or
1792 * some other thread could have re-filled it... if still
1793 * empty, figure out how many pages we can steal from the
1794 * global free queue and move to the per-cpu queue...
1795 * return 1 of these pages when done... only wakeup the
1796 * pageout_scan thread if we moved pages from the global
1797 * list... no need for the wakeup if we've satisfied the
1798 * request from the per-cpu queue.
1c79356b
A
1799 */
1800
2d21ac55
A
1801#define COLOR_GROUPS_TO_STEAL 4
1802
1c79356b
A
1803
1804vm_page_t
2d21ac55 1805vm_page_grab( void )
1c79356b 1806{
2d21ac55
A
1807 vm_page_t mem;
1808
1809
1810 disable_preemption();
1811
1812 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1813return_page_from_cpu_list:
1814 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1815 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1816 mem->pageq.next = NULL;
1817
1818 enable_preemption();
1819
1820 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1821 assert(mem->tabled == FALSE);
1822 assert(mem->object == VM_OBJECT_NULL);
1823 assert(!mem->laundry);
1824 assert(!mem->free);
1825 assert(pmap_verify_free(mem->phys_page));
1826 assert(mem->busy);
1827 assert(!mem->encrypted);
1828 assert(!mem->pmapped);
4a3eedf9 1829 assert(!mem->wpmapped);
6d2010ae
A
1830 assert(!mem->active);
1831 assert(!mem->inactive);
1832 assert(!mem->throttled);
1833 assert(!mem->speculative);
7ddcb079 1834 assert(!pmap_is_noencrypt(mem->phys_page));
2d21ac55
A
1835
1836 return mem;
1837 }
1838 enable_preemption();
1839
1c79356b 1840
1c79356b
A
1841 /*
1842 * Optionally produce warnings if the wire or gobble
1843 * counts exceed some threshold.
1844 */
1845 if (vm_page_wire_count_warning > 0
1846 && vm_page_wire_count >= vm_page_wire_count_warning) {
1847 printf("mk: vm_page_grab(): high wired page count of %d\n",
1848 vm_page_wire_count);
1849 assert(vm_page_wire_count < vm_page_wire_count_warning);
1850 }
1851 if (vm_page_gobble_count_warning > 0
1852 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1853 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1854 vm_page_gobble_count);
1855 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1856 }
1857
b0d623f7
A
1858 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1859
1c79356b
A
1860 /*
1861 * Only let privileged threads (involved in pageout)
1862 * dip into the reserved pool.
1863 */
1c79356b 1864 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 1865 !(current_thread()->options & TH_OPT_VMPRIV)) {
b0d623f7 1866 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 1867 mem = VM_PAGE_NULL;
1c79356b 1868 }
2d21ac55
A
1869 else {
1870 vm_page_t head;
1871 vm_page_t tail;
1872 unsigned int pages_to_steal;
1873 unsigned int color;
1c79356b 1874
2d21ac55 1875 while ( vm_page_free_count == 0 ) {
1c79356b 1876
b0d623f7 1877 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1878 /*
1879 * must be a privileged thread to be
1880 * in this state since a non-privileged
1881 * thread would have bailed if we were
1882 * under the vm_page_free_reserved mark
1883 */
1884 VM_PAGE_WAIT();
b0d623f7 1885 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
1886 }
1887
1888 disable_preemption();
1889
1890 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 1891 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1892
1893 /*
1894 * we got preempted and moved to another processor
1895 * or we got preempted and someone else ran and filled the cache
1896 */
1897 goto return_page_from_cpu_list;
1898 }
1899 if (vm_page_free_count <= vm_page_free_reserved)
1900 pages_to_steal = 1;
1901 else {
1902 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1903
1904 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1905 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1906 }
1907 color = PROCESSOR_DATA(current_processor(), start_color);
1908 head = tail = NULL;
1909
1910 while (pages_to_steal--) {
1911 if (--vm_page_free_count < vm_page_free_count_minimum)
1912 vm_page_free_count_minimum = vm_page_free_count;
1913
1914 while (queue_empty(&vm_page_queue_free[color]))
1915 color = (color + 1) & vm_color_mask;
1916
1917 queue_remove_first(&vm_page_queue_free[color],
1918 mem,
1919 vm_page_t,
1920 pageq);
1921 mem->pageq.next = NULL;
1922 mem->pageq.prev = NULL;
1923
6d2010ae
A
1924 assert(!mem->active);
1925 assert(!mem->inactive);
1926 assert(!mem->throttled);
1927 assert(!mem->speculative);
1928
2d21ac55
A
1929 color = (color + 1) & vm_color_mask;
1930
1931 if (head == NULL)
1932 head = mem;
1933 else
1934 tail->pageq.next = (queue_t)mem;
1935 tail = mem;
1936
1937 mem->pageq.prev = NULL;
1938 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1939 assert(mem->tabled == FALSE);
1940 assert(mem->object == VM_OBJECT_NULL);
1941 assert(!mem->laundry);
1942 assert(mem->free);
1943 mem->free = FALSE;
1944
1945 assert(pmap_verify_free(mem->phys_page));
1946 assert(mem->busy);
1947 assert(!mem->free);
1948 assert(!mem->encrypted);
1949 assert(!mem->pmapped);
4a3eedf9 1950 assert(!mem->wpmapped);
7ddcb079 1951 assert(!pmap_is_noencrypt(mem->phys_page));
2d21ac55
A
1952 }
1953 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1954 PROCESSOR_DATA(current_processor(), start_color) = color;
1955
1956 /*
1957 * satisfy this request
1958 */
1959 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1960 mem = head;
1961 mem->pageq.next = NULL;
91447636 1962
b0d623f7 1963 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1964
1965 enable_preemption();
1966 }
1c79356b
A
1967 /*
1968 * Decide if we should poke the pageout daemon.
1969 * We do this if the free count is less than the low
1970 * water mark, or if the free count is less than the high
1971 * water mark (but above the low water mark) and the inactive
1972 * count is less than its target.
1973 *
1974 * We don't have the counts locked ... if they change a little,
1975 * it doesn't really matter.
1976 */
1c79356b 1977 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
1978 ((vm_page_free_count < vm_page_free_target) &&
1979 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1980 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 1981
6d2010ae
A
1982 VM_CHECK_MEMORYSTATUS;
1983
55e303ae 1984// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1c79356b
A
1985
1986 return mem;
1987}
1988
1989/*
1990 * vm_page_release:
1991 *
1992 * Return a page to the free list.
1993 */
1994
1995void
1996vm_page_release(
1997 register vm_page_t mem)
1998{
2d21ac55 1999 unsigned int color;
b0d623f7
A
2000 int need_wakeup = 0;
2001 int need_priv_wakeup = 0;
55e303ae 2002
6d2010ae 2003
1c79356b 2004 assert(!mem->private && !mem->fictitious);
b0d623f7
A
2005 if (vm_page_free_verify) {
2006 assert(pmap_verify_free(mem->phys_page));
2007 }
55e303ae 2008// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 2009
7ddcb079
A
2010 pmap_clear_noencrypt(mem->phys_page);
2011
b0d623f7 2012 lck_mtx_lock_spin(&vm_page_queue_free_lock);
91447636 2013#if DEBUG
1c79356b
A
2014 if (mem->free)
2015 panic("vm_page_release");
91447636 2016#endif
6d2010ae 2017
2d21ac55 2018 assert(mem->busy);
91447636
A
2019 assert(!mem->laundry);
2020 assert(mem->object == VM_OBJECT_NULL);
2021 assert(mem->pageq.next == NULL &&
2022 mem->pageq.prev == NULL);
2d21ac55
A
2023 assert(mem->listq.next == NULL &&
2024 mem->listq.prev == NULL);
2025
6d2010ae 2026 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
0b4c1975
A
2027 vm_lopage_free_count < vm_lopage_free_limit &&
2028 mem->phys_page < max_valid_low_ppnum) {
0c530ab8
A
2029 /*
2030 * this exists to support hardware controllers
2031 * incapable of generating DMAs with more than 32 bits
2032 * of address on platforms with physical memory > 4G...
2033 */
2d21ac55
A
2034 queue_enter_first(&vm_lopage_queue_free,
2035 mem,
2036 vm_page_t,
2037 pageq);
0c530ab8 2038 vm_lopage_free_count++;
0b4c1975
A
2039
2040 if (vm_lopage_free_count >= vm_lopage_free_limit)
2041 vm_lopage_refill = FALSE;
2042
2043 mem->lopage = TRUE;
0c530ab8 2044 } else {
6d2010ae 2045 mem->lopage = FALSE;
0b4c1975
A
2046 mem->free = TRUE;
2047
2d21ac55
A
2048 color = mem->phys_page & vm_color_mask;
2049 queue_enter_first(&vm_page_queue_free[color],
2050 mem,
2051 vm_page_t,
2052 pageq);
0c530ab8
A
2053 vm_page_free_count++;
2054 /*
2055 * Check if we should wake up someone waiting for page.
2056 * But don't bother waking them unless they can allocate.
2057 *
2058 * We wakeup only one thread, to prevent starvation.
2059 * Because the scheduling system handles wait queues FIFO,
2060 * if we wakeup all waiting threads, one greedy thread
2061 * can starve multiple niceguy threads. When the threads
2062 * all wakeup, the greedy threads runs first, grabs the page,
2063 * and waits for another page. It will be the first to run
2064 * when the next page is freed.
2065 *
2066 * However, there is a slight danger here.
2067 * The thread we wake might not use the free page.
2068 * Then the other threads could wait indefinitely
2069 * while the page goes unused. To forestall this,
2070 * the pageout daemon will keep making free pages
2071 * as long as vm_page_free_wanted is non-zero.
2072 */
1c79356b 2073
b0d623f7
A
2074 assert(vm_page_free_count > 0);
2075 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 2076 vm_page_free_wanted_privileged--;
b0d623f7
A
2077 need_priv_wakeup = 1;
2078 } else if (vm_page_free_wanted > 0 &&
2079 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 2080 vm_page_free_wanted--;
b0d623f7 2081 need_wakeup = 1;
0c530ab8 2082 }
1c79356b 2083 }
b0d623f7
A
2084 lck_mtx_unlock(&vm_page_queue_free_lock);
2085
2086 if (need_priv_wakeup)
2087 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2088 else if (need_wakeup)
2089 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55 2090
6d2010ae 2091 VM_CHECK_MEMORYSTATUS;
1c79356b
A
2092}
2093
1c79356b
A
2094/*
2095 * vm_page_wait:
2096 *
2097 * Wait for a page to become available.
2098 * If there are plenty of free pages, then we don't sleep.
2099 *
2100 * Returns:
2101 * TRUE: There may be another page, try again
2102 * FALSE: We were interrupted out of our wait, don't try again
2103 */
2104
2105boolean_t
2106vm_page_wait(
2107 int interruptible )
2108{
2109 /*
2110 * We can't use vm_page_free_reserved to make this
2111 * determination. Consider: some thread might
2112 * need to allocate two pages. The first allocation
2113 * succeeds, the second fails. After the first page is freed,
2114 * a call to vm_page_wait must really block.
2115 */
9bccf70c 2116 kern_return_t wait_result;
9bccf70c 2117 int need_wakeup = 0;
2d21ac55 2118 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 2119
b0d623f7 2120 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2121
2122 if (is_privileged && vm_page_free_count) {
b0d623f7 2123 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2124 return TRUE;
2125 }
1c79356b 2126 if (vm_page_free_count < vm_page_free_target) {
2d21ac55
A
2127
2128 if (is_privileged) {
2129 if (vm_page_free_wanted_privileged++ == 0)
2130 need_wakeup = 1;
2131 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2132 } else {
2133 if (vm_page_free_wanted++ == 0)
2134 need_wakeup = 1;
2135 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2136 }
b0d623f7 2137 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2138 counter(c_vm_page_wait_block++);
0b4e3aa0
A
2139
2140 if (need_wakeup)
2141 thread_wakeup((event_t)&vm_page_free_wanted);
9bccf70c 2142
91447636 2143 if (wait_result == THREAD_WAITING)
9bccf70c
A
2144 wait_result = thread_block(THREAD_CONTINUE_NULL);
2145
1c79356b
A
2146 return(wait_result == THREAD_AWAKENED);
2147 } else {
b0d623f7 2148 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b
A
2149 return TRUE;
2150 }
2151}
2152
2153/*
2154 * vm_page_alloc:
2155 *
2156 * Allocate and return a memory cell associated
2157 * with this VM object/offset pair.
2158 *
2159 * Object must be locked.
2160 */
2161
2162vm_page_t
2163vm_page_alloc(
2164 vm_object_t object,
2165 vm_object_offset_t offset)
2166{
2167 register vm_page_t mem;
2168
2d21ac55 2169 vm_object_lock_assert_exclusive(object);
1c79356b
A
2170 mem = vm_page_grab();
2171 if (mem == VM_PAGE_NULL)
2172 return VM_PAGE_NULL;
2173
2174 vm_page_insert(mem, object, offset);
2175
2176 return(mem);
2177}
2178
0c530ab8
A
2179vm_page_t
2180vm_page_alloclo(
2181 vm_object_t object,
2182 vm_object_offset_t offset)
2183{
2184 register vm_page_t mem;
2185
2d21ac55 2186 vm_object_lock_assert_exclusive(object);
0c530ab8
A
2187 mem = vm_page_grablo();
2188 if (mem == VM_PAGE_NULL)
2189 return VM_PAGE_NULL;
2190
2191 vm_page_insert(mem, object, offset);
2192
2193 return(mem);
2194}
2195
2196
2d21ac55
A
2197/*
2198 * vm_page_alloc_guard:
2199 *
b0d623f7 2200 * Allocate a fictitious page which will be used
2d21ac55
A
2201 * as a guard page. The page will be inserted into
2202 * the object and returned to the caller.
2203 */
2204
2205vm_page_t
2206vm_page_alloc_guard(
2207 vm_object_t object,
2208 vm_object_offset_t offset)
2209{
2210 register vm_page_t mem;
2211
2212 vm_object_lock_assert_exclusive(object);
2213 mem = vm_page_grab_guard();
2214 if (mem == VM_PAGE_NULL)
2215 return VM_PAGE_NULL;
2216
2217 vm_page_insert(mem, object, offset);
2218
2219 return(mem);
2220}
2221
2222
1c79356b
A
2223counter(unsigned int c_laundry_pages_freed = 0;)
2224
1c79356b 2225/*
6d2010ae 2226 * vm_page_free_prepare:
1c79356b 2227 *
6d2010ae
A
2228 * Removes page from any queue it may be on
2229 * and disassociates it from its VM object.
1c79356b
A
2230 *
2231 * Object and page queues must be locked prior to entry.
2232 */
b0d623f7 2233static void
2d21ac55 2234vm_page_free_prepare(
6d2010ae 2235 vm_page_t mem)
b0d623f7
A
2236{
2237 vm_page_free_prepare_queues(mem);
2238 vm_page_free_prepare_object(mem, TRUE);
2239}
2240
2241
2242void
2243vm_page_free_prepare_queues(
2244 vm_page_t mem)
1c79356b 2245{
2d21ac55 2246 VM_PAGE_CHECK(mem);
1c79356b
A
2247 assert(!mem->free);
2248 assert(!mem->cleaning);
2d21ac55 2249#if DEBUG
b0d623f7 2250 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2251 if (mem->free)
b0d623f7 2252 panic("vm_page_free: freeing page on free list\n");
91447636 2253#endif
b0d623f7
A
2254 if (mem->object) {
2255 vm_object_lock_assert_exclusive(mem->object);
2256 }
2d21ac55
A
2257 if (mem->laundry) {
2258 /*
2259 * We may have to free a page while it's being laundered
2260 * if we lost its pager (due to a forced unmount, for example).
316670eb
A
2261 * We need to call vm_pageout_steal_laundry() before removing
2262 * the page from its VM object, so that we can remove it
2263 * from its pageout queue and adjust the laundry accounting
2d21ac55 2264 */
316670eb 2265 vm_pageout_steal_laundry(mem, TRUE);
2d21ac55
A
2266 counter(++c_laundry_pages_freed);
2267 }
316670eb 2268
b0d623f7
A
2269 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2270
2271 if (VM_PAGE_WIRED(mem)) {
2272 if (mem->object) {
2273 assert(mem->object->wired_page_count > 0);
2274 mem->object->wired_page_count--;
2275 assert(mem->object->resident_page_count >=
2276 mem->object->wired_page_count);
6d2010ae
A
2277
2278 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2279 OSAddAtomic(+1, &vm_page_purgeable_count);
2280 assert(vm_page_purgeable_wired_count > 0);
2281 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2282 }
b0d623f7 2283 }
1c79356b
A
2284 if (!mem->private && !mem->fictitious)
2285 vm_page_wire_count--;
2286 mem->wire_count = 0;
2287 assert(!mem->gobbled);
2288 } else if (mem->gobbled) {
2289 if (!mem->private && !mem->fictitious)
2290 vm_page_wire_count--;
2291 vm_page_gobble_count--;
2292 }
b0d623f7
A
2293}
2294
2295
2296void
2297vm_page_free_prepare_object(
2298 vm_page_t mem,
2299 boolean_t remove_from_hash)
2300{
b0d623f7
A
2301 if (mem->tabled)
2302 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 2303
b0d623f7 2304 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b
A
2305
2306 if (mem->private) {
2307 mem->private = FALSE;
2308 mem->fictitious = TRUE;
55e303ae 2309 mem->phys_page = vm_page_fictitious_addr;
1c79356b 2310 }
6d2010ae 2311 if ( !mem->fictitious) {
0b4c1975 2312 vm_page_init(mem, mem->phys_page, mem->lopage);
1c79356b
A
2313 }
2314}
2315
b0d623f7 2316
6d2010ae
A
2317/*
2318 * vm_page_free:
2319 *
2320 * Returns the given page to the free list,
2321 * disassociating it with any VM object.
2322 *
2323 * Object and page queues must be locked prior to entry.
2324 */
2d21ac55
A
2325void
2326vm_page_free(
2327 vm_page_t mem)
2328{
b0d623f7 2329 vm_page_free_prepare(mem);
6d2010ae 2330
b0d623f7
A
2331 if (mem->fictitious) {
2332 vm_page_release_fictitious(mem);
2333 } else {
2334 vm_page_release(mem);
2335 }
2336}
2337
2338
2339void
2340vm_page_free_unlocked(
2341 vm_page_t mem,
2342 boolean_t remove_from_hash)
2343{
2344 vm_page_lockspin_queues();
2345 vm_page_free_prepare_queues(mem);
2346 vm_page_unlock_queues();
2347
2348 vm_page_free_prepare_object(mem, remove_from_hash);
2349
2d21ac55
A
2350 if (mem->fictitious) {
2351 vm_page_release_fictitious(mem);
2352 } else {
2353 vm_page_release(mem);
2354 }
2355}
55e303ae 2356
316670eb 2357
2d21ac55
A
2358/*
2359 * Free a list of pages. The list can be up to several hundred pages,
2360 * as blocked up by vm_pageout_scan().
b0d623f7 2361 * The big win is not having to take the free list lock once
316670eb 2362 * per page.
2d21ac55 2363 */
55e303ae
A
2364void
2365vm_page_free_list(
316670eb 2366 vm_page_t freeq,
b0d623f7 2367 boolean_t prepare_object)
55e303ae 2368{
316670eb 2369 vm_page_t mem;
2d21ac55 2370 vm_page_t nxt;
316670eb
A
2371 vm_page_t local_freeq;
2372 int pg_count;
2d21ac55 2373
316670eb 2374 while (freeq) {
55e303ae 2375
316670eb
A
2376 pg_count = 0;
2377 local_freeq = VM_PAGE_NULL;
2378 mem = freeq;
b0d623f7 2379
316670eb
A
2380 /*
2381 * break up the processing into smaller chunks so
2382 * that we can 'pipeline' the pages onto the
2383 * free list w/o introducing too much
2384 * contention on the global free queue lock
2385 */
2386 while (mem && pg_count < 64) {
2387
2388 assert(!mem->inactive);
2389 assert(!mem->active);
2390 assert(!mem->throttled);
2391 assert(!mem->free);
2392 assert(!mem->speculative);
2393 assert(!VM_PAGE_WIRED(mem));
2394 assert(mem->pageq.prev == NULL);
2395
2396 nxt = (vm_page_t)(mem->pageq.next);
b0d623f7 2397
316670eb
A
2398 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2399 assert(pmap_verify_free(mem->phys_page));
2400 }
2401 if (prepare_object == TRUE)
2402 vm_page_free_prepare_object(mem, TRUE);
b0d623f7 2403
316670eb
A
2404 if (!mem->fictitious) {
2405 assert(mem->busy);
55e303ae 2406
316670eb
A
2407 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2408 vm_lopage_free_count < vm_lopage_free_limit &&
2409 mem->phys_page < max_valid_low_ppnum) {
2410 mem->pageq.next = NULL;
2411 vm_page_release(mem);
2412 } else {
2413 /*
2414 * IMPORTANT: we can't set the page "free" here
2415 * because that would make the page eligible for
2416 * a physically-contiguous allocation (see
2417 * vm_page_find_contiguous()) right away (we don't
2418 * hold the vm_page_queue_free lock). That would
2419 * cause trouble because the page is not actually
2420 * in the free queue yet...
2421 */
2422 mem->pageq.next = (queue_entry_t)local_freeq;
2423 local_freeq = mem;
2424 pg_count++;
935ed37a 2425
316670eb 2426 pmap_clear_noencrypt(mem->phys_page);
935ed37a 2427 }
316670eb
A
2428 } else {
2429 assert(mem->phys_page == vm_page_fictitious_addr ||
2430 mem->phys_page == vm_page_guard_addr);
2431 vm_page_release_fictitious(mem);
2d21ac55 2432 }
316670eb 2433 mem = nxt;
55e303ae 2434 }
316670eb
A
2435 freeq = mem;
2436
2437 if ( (mem = local_freeq) ) {
2438 unsigned int avail_free_count;
2439 unsigned int need_wakeup = 0;
2440 unsigned int need_priv_wakeup = 0;
2d21ac55 2441
316670eb 2442 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 2443
316670eb
A
2444 while (mem) {
2445 int color;
2446
2447 nxt = (vm_page_t)(mem->pageq.next);
2d21ac55 2448
b0d623f7
A
2449 assert(!mem->free);
2450 assert(mem->busy);
2451 mem->free = TRUE;
b0d623f7 2452
316670eb
A
2453 color = mem->phys_page & vm_color_mask;
2454 queue_enter_first(&vm_page_queue_free[color],
2455 mem,
2456 vm_page_t,
2457 pageq);
2458 mem = nxt;
2d21ac55 2459 }
316670eb
A
2460 vm_page_free_count += pg_count;
2461 avail_free_count = vm_page_free_count;
2462
2463 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2464
2465 if (avail_free_count < vm_page_free_wanted_privileged) {
2466 need_priv_wakeup = avail_free_count;
2467 vm_page_free_wanted_privileged -= avail_free_count;
2468 avail_free_count = 0;
2469 } else {
2470 need_priv_wakeup = vm_page_free_wanted_privileged;
2471 vm_page_free_wanted_privileged = 0;
2472 avail_free_count -= vm_page_free_wanted_privileged;
2473 }
b0d623f7 2474 }
316670eb
A
2475 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2476 unsigned int available_pages;
55e303ae 2477
316670eb 2478 available_pages = avail_free_count - vm_page_free_reserved;
55e303ae 2479
316670eb
A
2480 if (available_pages >= vm_page_free_wanted) {
2481 need_wakeup = vm_page_free_wanted;
2482 vm_page_free_wanted = 0;
2483 } else {
2484 need_wakeup = available_pages;
2485 vm_page_free_wanted -= available_pages;
2486 }
2487 }
2488 lck_mtx_unlock(&vm_page_queue_free_lock);
55e303ae 2489
316670eb
A
2490 if (need_priv_wakeup != 0) {
2491 /*
2492 * There shouldn't be that many VM-privileged threads,
2493 * so let's wake them all up, even if we don't quite
2494 * have enough pages to satisfy them all.
2495 */
2496 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2497 }
2498 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2499 /*
2500 * We don't expect to have any more waiters
2501 * after this, so let's wake them all up at
2502 * once.
2503 */
2504 thread_wakeup((event_t) &vm_page_free_count);
2505 } else for (; need_wakeup != 0; need_wakeup--) {
2506 /*
2507 * Wake up one waiter per page we just released.
2508 */
2509 thread_wakeup_one((event_t) &vm_page_free_count);
55e303ae 2510 }
2d21ac55 2511
316670eb 2512 VM_CHECK_MEMORYSTATUS;
b0d623f7 2513 }
55e303ae
A
2514 }
2515}
2516
2517
1c79356b
A
2518/*
2519 * vm_page_wire:
2520 *
2521 * Mark this page as wired down by yet
2522 * another map, removing it from paging queues
2523 * as necessary.
2524 *
2525 * The page's object and the page queues must be locked.
2526 */
2527void
2528vm_page_wire(
2529 register vm_page_t mem)
2530{
2531
91447636 2532// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1c79356b
A
2533
2534 VM_PAGE_CHECK(mem);
b0d623f7
A
2535 if (mem->object) {
2536 vm_object_lock_assert_exclusive(mem->object);
2537 } else {
2538 /*
2539 * In theory, the page should be in an object before it
2540 * gets wired, since we need to hold the object lock
2541 * to update some fields in the page structure.
2542 * However, some code (i386 pmap, for example) might want
2543 * to wire a page before it gets inserted into an object.
2544 * That's somewhat OK, as long as nobody else can get to
2545 * that page and update it at the same time.
2546 */
2547 }
91447636 2548#if DEBUG
b0d623f7 2549 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2550#endif
b0d623f7 2551 if ( !VM_PAGE_WIRED(mem)) {
316670eb
A
2552
2553 if (mem->pageout_queue) {
2554 mem->pageout = FALSE;
2555 vm_pageout_throttle_up(mem);
2556 }
1c79356b 2557 VM_PAGE_QUEUES_REMOVE(mem);
b0d623f7
A
2558
2559 if (mem->object) {
2560 mem->object->wired_page_count++;
2561 assert(mem->object->resident_page_count >=
2562 mem->object->wired_page_count);
2563 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2564 assert(vm_page_purgeable_count > 0);
2565 OSAddAtomic(-1, &vm_page_purgeable_count);
2566 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2567 }
2568 if (mem->object->all_reusable) {
2569 /*
2570 * Wired pages are not counted as "re-usable"
2571 * in "all_reusable" VM objects, so nothing
2572 * to do here.
2573 */
2574 } else if (mem->reusable) {
2575 /*
2576 * This page is not "re-usable" when it's
2577 * wired, so adjust its state and the
2578 * accounting.
2579 */
2580 vm_object_reuse_pages(mem->object,
2581 mem->offset,
2582 mem->offset+PAGE_SIZE_64,
2583 FALSE);
2584 }
2585 }
2586 assert(!mem->reusable);
2587
1c79356b
A
2588 if (!mem->private && !mem->fictitious && !mem->gobbled)
2589 vm_page_wire_count++;
2590 if (mem->gobbled)
2591 vm_page_gobble_count--;
2592 mem->gobbled = FALSE;
593a1d5f 2593
6d2010ae
A
2594 VM_CHECK_MEMORYSTATUS;
2595
91447636
A
2596 /*
2597 * ENCRYPTED SWAP:
2598 * The page could be encrypted, but
2599 * We don't have to decrypt it here
2600 * because we don't guarantee that the
2601 * data is actually valid at this point.
2602 * The page will get decrypted in
2603 * vm_fault_wire() if needed.
2604 */
1c79356b
A
2605 }
2606 assert(!mem->gobbled);
2607 mem->wire_count++;
b0d623f7 2608 VM_PAGE_CHECK(mem);
1c79356b
A
2609}
2610
2611/*
2612 * vm_page_gobble:
2613 *
2614 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2615 *
2616 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2617 */
2618void
2619vm_page_gobble(
2620 register vm_page_t mem)
2621{
2d21ac55 2622 vm_page_lockspin_queues();
1c79356b
A
2623 VM_PAGE_CHECK(mem);
2624
2625 assert(!mem->gobbled);
b0d623f7 2626 assert( !VM_PAGE_WIRED(mem));
1c79356b 2627
b0d623f7 2628 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
1c79356b
A
2629 if (!mem->private && !mem->fictitious)
2630 vm_page_wire_count++;
2631 }
2632 vm_page_gobble_count++;
2633 mem->gobbled = TRUE;
2634 vm_page_unlock_queues();
2635}
2636
2637/*
2638 * vm_page_unwire:
2639 *
2640 * Release one wiring of this page, potentially
2641 * enabling it to be paged again.
2642 *
2643 * The page's object and the page queues must be locked.
2644 */
2645void
2646vm_page_unwire(
0b4c1975
A
2647 vm_page_t mem,
2648 boolean_t queueit)
1c79356b
A
2649{
2650
91447636 2651// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1c79356b
A
2652
2653 VM_PAGE_CHECK(mem);
b0d623f7
A
2654 assert(VM_PAGE_WIRED(mem));
2655 assert(mem->object != VM_OBJECT_NULL);
91447636 2656#if DEBUG
b0d623f7
A
2657 vm_object_lock_assert_exclusive(mem->object);
2658 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2659#endif
1c79356b
A
2660 if (--mem->wire_count == 0) {
2661 assert(!mem->private && !mem->fictitious);
2662 vm_page_wire_count--;
b0d623f7
A
2663 assert(mem->object->wired_page_count > 0);
2664 mem->object->wired_page_count--;
2665 assert(mem->object->resident_page_count >=
2666 mem->object->wired_page_count);
2667 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2668 OSAddAtomic(+1, &vm_page_purgeable_count);
2669 assert(vm_page_purgeable_wired_count > 0);
2670 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2671 }
91447636
A
2672 assert(!mem->laundry);
2673 assert(mem->object != kernel_object);
2674 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
0b4c1975
A
2675
2676 if (queueit == TRUE) {
2677 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2678 vm_page_deactivate(mem);
2679 } else {
2680 vm_page_activate(mem);
2681 }
2d21ac55 2682 }
593a1d5f 2683
6d2010ae
A
2684 VM_CHECK_MEMORYSTATUS;
2685
1c79356b 2686 }
b0d623f7 2687 VM_PAGE_CHECK(mem);
1c79356b
A
2688}
2689
2690/*
2691 * vm_page_deactivate:
2692 *
2693 * Returns the given page to the inactive list,
2694 * indicating that no physical maps have access
2695 * to this page. [Used by the physical mapping system.]
2696 *
2697 * The page queues must be locked.
2698 */
2699void
2700vm_page_deactivate(
b0d623f7
A
2701 vm_page_t m)
2702{
2703 vm_page_deactivate_internal(m, TRUE);
2704}
2705
2706
2707void
2708vm_page_deactivate_internal(
2709 vm_page_t m,
2710 boolean_t clear_hw_reference)
1c79356b 2711{
2d21ac55 2712
1c79356b 2713 VM_PAGE_CHECK(m);
91447636 2714 assert(m->object != kernel_object);
2d21ac55 2715 assert(m->phys_page != vm_page_guard_addr);
1c79356b 2716
55e303ae 2717// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
91447636 2718#if DEBUG
b0d623f7 2719 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2720#endif
1c79356b
A
2721 /*
2722 * This page is no longer very interesting. If it was
2723 * interesting (active or inactive/referenced), then we
2724 * clear the reference bit and (re)enter it in the
2725 * inactive queue. Note wired pages should not have
2726 * their reference bit cleared.
2727 */
6d2010ae 2728 assert ( !(m->absent && !m->unusual));
0b4c1975 2729
1c79356b 2730 if (m->gobbled) { /* can this happen? */
b0d623f7 2731 assert( !VM_PAGE_WIRED(m));
2d21ac55 2732
1c79356b
A
2733 if (!m->private && !m->fictitious)
2734 vm_page_wire_count--;
2735 vm_page_gobble_count--;
2736 m->gobbled = FALSE;
2737 }
316670eb
A
2738 /*
2739 * if this page is currently on the pageout queue, we can't do the
2740 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2741 * and we can't remove it manually since we would need the object lock
2742 * (which is not required here) to decrement the activity_in_progress
2743 * reference which is held on the object while the page is in the pageout queue...
2744 * just let the normal laundry processing proceed
2745 */
2746 if (m->pageout_queue || m->private || m->fictitious || (VM_PAGE_WIRED(m)))
1c79356b 2747 return;
2d21ac55 2748
6d2010ae 2749 if (!m->absent && clear_hw_reference == TRUE)
2d21ac55
A
2750 pmap_clear_reference(m->phys_page);
2751
2752 m->reference = FALSE;
2d21ac55
A
2753 m->no_cache = FALSE;
2754
2755 if (!m->inactive) {
2756 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 2757
6d2010ae 2758 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
d1ecb069
A
2759 m->dirty && m->object->internal &&
2760 (m->object->purgable == VM_PURGABLE_DENY ||
2761 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2762 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2763 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2764 m->throttled = TRUE;
2765 vm_page_throttled_count++;
9bccf70c 2766 } else {
6d2010ae 2767 if (m->object->named && m->object->ref_count == 1) {
2d21ac55 2768 vm_page_speculate(m, FALSE);
b0d623f7 2769#if DEVELOPMENT || DEBUG
2d21ac55 2770 vm_page_speculative_recreated++;
b0d623f7 2771#endif
2d21ac55 2772 } else {
6d2010ae 2773 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2d21ac55 2774 }
9bccf70c 2775 }
1c79356b
A
2776 }
2777}
2778
316670eb
A
2779/*
2780 * vm_page_enqueue_cleaned
2781 *
2782 * Put the page on the cleaned queue, mark it cleaned, etc.
2783 * Being on the cleaned queue (and having m->clean_queue set)
2784 * does ** NOT ** guarantee that the page is clean!
2785 *
2786 * Call with the queues lock held.
2787 */
2788
2789void vm_page_enqueue_cleaned(vm_page_t m)
2790{
2791 assert(m->phys_page != vm_page_guard_addr);
2792#if DEBUG
2793 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2794#endif
2795 assert( !(m->absent && !m->unusual));
2796
2797 if (m->gobbled) {
2798 assert( !VM_PAGE_WIRED(m));
2799 if (!m->private && !m->fictitious)
2800 vm_page_wire_count--;
2801 vm_page_gobble_count--;
2802 m->gobbled = FALSE;
2803 }
2804 /*
2805 * if this page is currently on the pageout queue, we can't do the
2806 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2807 * and we can't remove it manually since we would need the object lock
2808 * (which is not required here) to decrement the activity_in_progress
2809 * reference which is held on the object while the page is in the pageout queue...
2810 * just let the normal laundry processing proceed
2811 */
2812 if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2813 return;
2814
2815 VM_PAGE_QUEUES_REMOVE(m);
2816
2817 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2818 m->clean_queue = TRUE;
2819 vm_page_cleaned_count++;
2820
2821 m->inactive = TRUE;
2822 vm_page_inactive_count++;
2823
2824 vm_pageout_enqueued_cleaned++;
2825}
2826
1c79356b
A
2827/*
2828 * vm_page_activate:
2829 *
2830 * Put the specified page on the active list (if appropriate).
2831 *
2832 * The page queues must be locked.
2833 */
2834
2835void
2836vm_page_activate(
2837 register vm_page_t m)
2838{
2839 VM_PAGE_CHECK(m);
2d21ac55 2840#ifdef FIXME_4778297
91447636 2841 assert(m->object != kernel_object);
2d21ac55
A
2842#endif
2843 assert(m->phys_page != vm_page_guard_addr);
91447636 2844#if DEBUG
b0d623f7 2845 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2846#endif
6d2010ae 2847 assert( !(m->absent && !m->unusual));
0b4c1975 2848
1c79356b 2849 if (m->gobbled) {
b0d623f7 2850 assert( !VM_PAGE_WIRED(m));
1c79356b
A
2851 if (!m->private && !m->fictitious)
2852 vm_page_wire_count--;
2853 vm_page_gobble_count--;
2854 m->gobbled = FALSE;
2855 }
316670eb
A
2856 /*
2857 * if this page is currently on the pageout queue, we can't do the
2858 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2859 * and we can't remove it manually since we would need the object lock
2860 * (which is not required here) to decrement the activity_in_progress
2861 * reference which is held on the object while the page is in the pageout queue...
2862 * just let the normal laundry processing proceed
2863 */
2864 if (m->pageout_queue || m->private || m->fictitious)
1c79356b
A
2865 return;
2866
2d21ac55
A
2867#if DEBUG
2868 if (m->active)
2869 panic("vm_page_activate: already active");
2870#endif
2871
2872 if (m->speculative) {
2873 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2874 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2875 }
316670eb 2876
2d21ac55
A
2877 VM_PAGE_QUEUES_REMOVE(m);
2878
b0d623f7 2879 if ( !VM_PAGE_WIRED(m)) {
316670eb 2880
6d2010ae
A
2881 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2882 m->dirty && m->object->internal &&
d1ecb069
A
2883 (m->object->purgable == VM_PURGABLE_DENY ||
2884 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2885 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2886 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2887 m->throttled = TRUE;
2888 vm_page_throttled_count++;
9bccf70c 2889 } else {
2d21ac55
A
2890 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2891 m->active = TRUE;
6d2010ae 2892 vm_page_active_count++;
9bccf70c 2893 }
2d21ac55
A
2894 m->reference = TRUE;
2895 m->no_cache = FALSE;
1c79356b 2896 }
b0d623f7 2897 VM_PAGE_CHECK(m);
2d21ac55
A
2898}
2899
2900
2901/*
2902 * vm_page_speculate:
2903 *
2904 * Put the specified page on the speculative list (if appropriate).
2905 *
2906 * The page queues must be locked.
2907 */
2908void
2909vm_page_speculate(
2910 vm_page_t m,
2911 boolean_t new)
2912{
2913 struct vm_speculative_age_q *aq;
2914
2915 VM_PAGE_CHECK(m);
2916 assert(m->object != kernel_object);
2d21ac55 2917 assert(m->phys_page != vm_page_guard_addr);
91447636 2918#if DEBUG
b0d623f7 2919 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2920#endif
6d2010ae 2921 assert( !(m->absent && !m->unusual));
b0d623f7 2922
316670eb
A
2923 /*
2924 * if this page is currently on the pageout queue, we can't do the
2925 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926 * and we can't remove it manually since we would need the object lock
2927 * (which is not required here) to decrement the activity_in_progress
2928 * reference which is held on the object while the page is in the pageout queue...
2929 * just let the normal laundry processing proceed
2930 */
2931 if (m->pageout_queue || m->private || m->fictitious)
6d2010ae 2932 return;
0b4c1975 2933
b0d623f7
A
2934 VM_PAGE_QUEUES_REMOVE(m);
2935
2936 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 2937 mach_timespec_t ts;
b0d623f7
A
2938 clock_sec_t sec;
2939 clock_nsec_t nsec;
2d21ac55 2940
b0d623f7
A
2941 clock_get_system_nanotime(&sec, &nsec);
2942 ts.tv_sec = (unsigned int) sec;
2943 ts.tv_nsec = nsec;
2d21ac55
A
2944
2945 if (vm_page_speculative_count == 0) {
2946
2947 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2948 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2949
2950 aq = &vm_page_queue_speculative[speculative_age_index];
2951
2952 /*
2953 * set the timer to begin a new group
2954 */
6d2010ae
A
2955 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2956 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
2957
2958 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2959 } else {
2960 aq = &vm_page_queue_speculative[speculative_age_index];
2961
2962 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2963
2964 speculative_age_index++;
2965
2966 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2967 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2968 if (speculative_age_index == speculative_steal_index) {
2969 speculative_steal_index = speculative_age_index + 1;
2970
2971 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2972 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2973 }
2974 aq = &vm_page_queue_speculative[speculative_age_index];
2975
2976 if (!queue_empty(&aq->age_q))
2977 vm_page_speculate_ageit(aq);
2978
6d2010ae
A
2979 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2980 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2d21ac55
A
2981
2982 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2983 }
2984 }
2985 enqueue_tail(&aq->age_q, &m->pageq);
2986 m->speculative = TRUE;
2987 vm_page_speculative_count++;
2988
2989 if (new == TRUE) {
6d2010ae
A
2990 vm_object_lock_assert_exclusive(m->object);
2991
2d21ac55 2992 m->object->pages_created++;
b0d623f7 2993#if DEVELOPMENT || DEBUG
2d21ac55 2994 vm_page_speculative_created++;
b0d623f7 2995#endif
2d21ac55
A
2996 }
2997 }
b0d623f7 2998 VM_PAGE_CHECK(m);
2d21ac55
A
2999}
3000
3001
3002/*
3003 * move pages from the specified aging bin to
3004 * the speculative bin that pageout_scan claims from
3005 *
3006 * The page queues must be locked.
3007 */
3008void
3009vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3010{
3011 struct vm_speculative_age_q *sq;
3012 vm_page_t t;
3013
3014 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3015
3016 if (queue_empty(&sq->age_q)) {
3017 sq->age_q.next = aq->age_q.next;
3018 sq->age_q.prev = aq->age_q.prev;
3019
3020 t = (vm_page_t)sq->age_q.next;
3021 t->pageq.prev = &sq->age_q;
3022
3023 t = (vm_page_t)sq->age_q.prev;
3024 t->pageq.next = &sq->age_q;
3025 } else {
3026 t = (vm_page_t)sq->age_q.prev;
3027 t->pageq.next = aq->age_q.next;
3028
3029 t = (vm_page_t)aq->age_q.next;
3030 t->pageq.prev = sq->age_q.prev;
3031
3032 t = (vm_page_t)aq->age_q.prev;
3033 t->pageq.next = &sq->age_q;
3034
3035 sq->age_q.prev = aq->age_q.prev;
1c79356b 3036 }
2d21ac55
A
3037 queue_init(&aq->age_q);
3038}
3039
3040
3041void
3042vm_page_lru(
3043 vm_page_t m)
3044{
3045 VM_PAGE_CHECK(m);
3046 assert(m->object != kernel_object);
3047 assert(m->phys_page != vm_page_guard_addr);
3048
3049#if DEBUG
b0d623f7 3050 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 3051#endif
316670eb
A
3052 /*
3053 * if this page is currently on the pageout queue, we can't do the
3054 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3055 * and we can't remove it manually since we would need the object lock
3056 * (which is not required here) to decrement the activity_in_progress
3057 * reference which is held on the object while the page is in the pageout queue...
3058 * just let the normal laundry processing proceed
3059 */
3060 if (m->pageout_queue || m->private || (VM_PAGE_WIRED(m)))
2d21ac55
A
3061 return;
3062
3063 m->no_cache = FALSE;
3064
3065 VM_PAGE_QUEUES_REMOVE(m);
3066
6d2010ae 3067 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
1c79356b
A
3068}
3069
2d21ac55 3070
b0d623f7
A
3071void
3072vm_page_reactivate_all_throttled(void)
3073{
3074 vm_page_t first_throttled, last_throttled;
3075 vm_page_t first_active;
3076 vm_page_t m;
3077 int extra_active_count;
3078
6d2010ae
A
3079 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3080 return;
3081
b0d623f7
A
3082 extra_active_count = 0;
3083 vm_page_lock_queues();
3084 if (! queue_empty(&vm_page_queue_throttled)) {
3085 /*
3086 * Switch "throttled" pages to "active".
3087 */
3088 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3089 VM_PAGE_CHECK(m);
3090 assert(m->throttled);
3091 assert(!m->active);
3092 assert(!m->inactive);
3093 assert(!m->speculative);
3094 assert(!VM_PAGE_WIRED(m));
6d2010ae
A
3095
3096 extra_active_count++;
3097
b0d623f7
A
3098 m->throttled = FALSE;
3099 m->active = TRUE;
3100 VM_PAGE_CHECK(m);
3101 }
3102
3103 /*
3104 * Transfer the entire throttled queue to a regular LRU page queues.
3105 * We insert it at the head of the active queue, so that these pages
3106 * get re-evaluated by the LRU algorithm first, since they've been
3107 * completely out of it until now.
3108 */
3109 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3110 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3111 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3112 if (queue_empty(&vm_page_queue_active)) {
3113 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3114 } else {
3115 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3116 }
3117 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3118 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3119 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3120
3121#if DEBUG
3122 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3123#endif
3124 queue_init(&vm_page_queue_throttled);
3125 /*
3126 * Adjust the global page counts.
3127 */
3128 vm_page_active_count += extra_active_count;
3129 vm_page_throttled_count = 0;
3130 }
3131 assert(vm_page_throttled_count == 0);
3132 assert(queue_empty(&vm_page_queue_throttled));
3133 vm_page_unlock_queues();
3134}
3135
3136
3137/*
3138 * move pages from the indicated local queue to the global active queue
3139 * its ok to fail if we're below the hard limit and force == FALSE
3140 * the nolocks == TRUE case is to allow this function to be run on
3141 * the hibernate path
3142 */
3143
3144void
3145vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3146{
3147 struct vpl *lq;
3148 vm_page_t first_local, last_local;
3149 vm_page_t first_active;
3150 vm_page_t m;
3151 uint32_t count = 0;
3152
3153 if (vm_page_local_q == NULL)
3154 return;
3155
3156 lq = &vm_page_local_q[lid].vpl_un.vpl;
3157
3158 if (nolocks == FALSE) {
3159 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3160 if ( !vm_page_trylockspin_queues())
3161 return;
3162 } else
3163 vm_page_lockspin_queues();
3164
3165 VPL_LOCK(&lq->vpl_lock);
3166 }
3167 if (lq->vpl_count) {
3168 /*
3169 * Switch "local" pages to "active".
3170 */
3171 assert(!queue_empty(&lq->vpl_queue));
3172
3173 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3174 VM_PAGE_CHECK(m);
3175 assert(m->local);
3176 assert(!m->active);
3177 assert(!m->inactive);
3178 assert(!m->speculative);
3179 assert(!VM_PAGE_WIRED(m));
3180 assert(!m->throttled);
3181 assert(!m->fictitious);
3182
3183 if (m->local_id != lid)
3184 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3185
3186 m->local_id = 0;
3187 m->local = FALSE;
3188 m->active = TRUE;
3189 VM_PAGE_CHECK(m);
3190
3191 count++;
3192 }
3193 if (count != lq->vpl_count)
3194 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3195
3196 /*
3197 * Transfer the entire local queue to a regular LRU page queues.
3198 */
3199 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3200 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3201 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3202
3203 if (queue_empty(&vm_page_queue_active)) {
3204 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3205 } else {
3206 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3207 }
3208 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3209 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3210 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3211
3212 queue_init(&lq->vpl_queue);
3213 /*
3214 * Adjust the global page counts.
3215 */
3216 vm_page_active_count += lq->vpl_count;
3217 lq->vpl_count = 0;
3218 }
3219 assert(queue_empty(&lq->vpl_queue));
3220
3221 if (nolocks == FALSE) {
3222 VPL_UNLOCK(&lq->vpl_lock);
3223 vm_page_unlock_queues();
3224 }
3225}
3226
1c79356b
A
3227/*
3228 * vm_page_part_zero_fill:
3229 *
3230 * Zero-fill a part of the page.
3231 */
3232void
3233vm_page_part_zero_fill(
3234 vm_page_t m,
3235 vm_offset_t m_pa,
3236 vm_size_t len)
3237{
3238 vm_page_t tmp;
3239
316670eb
A
3240#if 0
3241 /*
3242 * we don't hold the page queue lock
3243 * so this check isn't safe to make
3244 */
1c79356b 3245 VM_PAGE_CHECK(m);
316670eb
A
3246#endif
3247
1c79356b 3248#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
55e303ae 3249 pmap_zero_part_page(m->phys_page, m_pa, len);
1c79356b
A
3250#else
3251 while (1) {
3252 tmp = vm_page_grab();
3253 if (tmp == VM_PAGE_NULL) {
3254 vm_page_wait(THREAD_UNINT);
3255 continue;
3256 }
3257 break;
3258 }
3259 vm_page_zero_fill(tmp);
3260 if(m_pa != 0) {
3261 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3262 }
3263 if((m_pa + len) < PAGE_SIZE) {
3264 vm_page_part_copy(m, m_pa + len, tmp,
3265 m_pa + len, PAGE_SIZE - (m_pa + len));
3266 }
3267 vm_page_copy(tmp,m);
b0d623f7 3268 VM_PAGE_FREE(tmp);
1c79356b
A
3269#endif
3270
3271}
3272
3273/*
3274 * vm_page_zero_fill:
3275 *
3276 * Zero-fill the specified page.
3277 */
3278void
3279vm_page_zero_fill(
3280 vm_page_t m)
3281{
3282 XPR(XPR_VM_PAGE,
3283 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 3284 m->object, m->offset, m, 0,0);
316670eb
A
3285#if 0
3286 /*
3287 * we don't hold the page queue lock
3288 * so this check isn't safe to make
3289 */
1c79356b 3290 VM_PAGE_CHECK(m);
316670eb 3291#endif
1c79356b 3292
55e303ae
A
3293// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3294 pmap_zero_page(m->phys_page);
1c79356b
A
3295}
3296
3297/*
3298 * vm_page_part_copy:
3299 *
3300 * copy part of one page to another
3301 */
3302
3303void
3304vm_page_part_copy(
3305 vm_page_t src_m,
3306 vm_offset_t src_pa,
3307 vm_page_t dst_m,
3308 vm_offset_t dst_pa,
3309 vm_size_t len)
3310{
316670eb
A
3311#if 0
3312 /*
3313 * we don't hold the page queue lock
3314 * so this check isn't safe to make
3315 */
1c79356b
A
3316 VM_PAGE_CHECK(src_m);
3317 VM_PAGE_CHECK(dst_m);
316670eb 3318#endif
55e303ae
A
3319 pmap_copy_part_page(src_m->phys_page, src_pa,
3320 dst_m->phys_page, dst_pa, len);
1c79356b
A
3321}
3322
3323/*
3324 * vm_page_copy:
3325 *
3326 * Copy one page to another
91447636
A
3327 *
3328 * ENCRYPTED SWAP:
3329 * The source page should not be encrypted. The caller should
3330 * make sure the page is decrypted first, if necessary.
1c79356b
A
3331 */
3332
2d21ac55
A
3333int vm_page_copy_cs_validations = 0;
3334int vm_page_copy_cs_tainted = 0;
3335
1c79356b
A
3336void
3337vm_page_copy(
3338 vm_page_t src_m,
3339 vm_page_t dest_m)
3340{
3341 XPR(XPR_VM_PAGE,
3342 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
b0d623f7
A
3343 src_m->object, src_m->offset,
3344 dest_m->object, dest_m->offset,
1c79356b 3345 0);
316670eb
A
3346#if 0
3347 /*
3348 * we don't hold the page queue lock
3349 * so this check isn't safe to make
3350 */
1c79356b
A
3351 VM_PAGE_CHECK(src_m);
3352 VM_PAGE_CHECK(dest_m);
316670eb
A
3353#endif
3354 vm_object_lock_assert_held(src_m->object);
1c79356b 3355
91447636
A
3356 /*
3357 * ENCRYPTED SWAP:
3358 * The source page should not be encrypted at this point.
3359 * The destination page will therefore not contain encrypted
3360 * data after the copy.
3361 */
3362 if (src_m->encrypted) {
3363 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3364 }
3365 dest_m->encrypted = FALSE;
3366
2d21ac55 3367 if (src_m->object != VM_OBJECT_NULL &&
4a3eedf9 3368 src_m->object->code_signed) {
2d21ac55 3369 /*
4a3eedf9 3370 * We're copying a page from a code-signed object.
2d21ac55
A
3371 * Whoever ends up mapping the copy page might care about
3372 * the original page's integrity, so let's validate the
3373 * source page now.
3374 */
3375 vm_page_copy_cs_validations++;
3376 vm_page_validate_cs(src_m);
3377 }
6d2010ae
A
3378
3379 if (vm_page_is_slideable(src_m)) {
3380 boolean_t was_busy = src_m->busy;
3381 src_m->busy = TRUE;
3382 (void) vm_page_slide(src_m, 0);
3383 assert(src_m->busy);
316670eb 3384 if (!was_busy) {
6d2010ae
A
3385 PAGE_WAKEUP_DONE(src_m);
3386 }
3387 }
3388
2d21ac55 3389 /*
b0d623f7
A
3390 * Propagate the cs_tainted bit to the copy page. Do not propagate
3391 * the cs_validated bit.
2d21ac55 3392 */
2d21ac55
A
3393 dest_m->cs_tainted = src_m->cs_tainted;
3394 if (dest_m->cs_tainted) {
2d21ac55
A
3395 vm_page_copy_cs_tainted++;
3396 }
6d2010ae
A
3397 dest_m->slid = src_m->slid;
3398 dest_m->error = src_m->error; /* sliding src_m might have failed... */
55e303ae 3399 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
1c79356b
A
3400}
3401
2d21ac55 3402#if MACH_ASSERT
b0d623f7
A
3403static void
3404_vm_page_print(
3405 vm_page_t p)
3406{
3407 printf("vm_page %p: \n", p);
3408 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3409 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3410 printf(" next=%p\n", p->next);
3411 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3412 printf(" wire_count=%u\n", p->wire_count);
3413
3414 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3415 (p->local ? "" : "!"),
3416 (p->inactive ? "" : "!"),
3417 (p->active ? "" : "!"),
3418 (p->pageout_queue ? "" : "!"),
3419 (p->speculative ? "" : "!"),
3420 (p->laundry ? "" : "!"));
3421 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3422 (p->free ? "" : "!"),
3423 (p->reference ? "" : "!"),
3424 (p->gobbled ? "" : "!"),
3425 (p->private ? "" : "!"),
3426 (p->throttled ? "" : "!"));
3427 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3428 (p->busy ? "" : "!"),
3429 (p->wanted ? "" : "!"),
3430 (p->tabled ? "" : "!"),
3431 (p->fictitious ? "" : "!"),
3432 (p->pmapped ? "" : "!"),
3433 (p->wpmapped ? "" : "!"));
3434 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3435 (p->pageout ? "" : "!"),
3436 (p->absent ? "" : "!"),
3437 (p->error ? "" : "!"),
3438 (p->dirty ? "" : "!"),
3439 (p->cleaning ? "" : "!"),
3440 (p->precious ? "" : "!"),
3441 (p->clustered ? "" : "!"));
3442 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3443 (p->overwriting ? "" : "!"),
3444 (p->restart ? "" : "!"),
3445 (p->unusual ? "" : "!"),
3446 (p->encrypted ? "" : "!"),
3447 (p->encrypted_cleaning ? "" : "!"));
316670eb 3448 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
b0d623f7
A
3449 (p->cs_validated ? "" : "!"),
3450 (p->cs_tainted ? "" : "!"),
3451 (p->no_cache ? "" : "!"));
b0d623f7
A
3452
3453 printf("phys_page=0x%x\n", p->phys_page);
3454}
3455
1c79356b
A
3456/*
3457 * Check that the list of pages is ordered by
3458 * ascending physical address and has no holes.
3459 */
2d21ac55 3460static int
1c79356b
A
3461vm_page_verify_contiguous(
3462 vm_page_t pages,
3463 unsigned int npages)
3464{
3465 register vm_page_t m;
3466 unsigned int page_count;
91447636 3467 vm_offset_t prev_addr;
1c79356b 3468
55e303ae 3469 prev_addr = pages->phys_page;
1c79356b
A
3470 page_count = 1;
3471 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
55e303ae 3472 if (m->phys_page != prev_addr + 1) {
b0d623f7
A
3473 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3474 m, (long)prev_addr, m->phys_page);
6d2010ae 3475 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
1c79356b
A
3476 panic("vm_page_verify_contiguous: not contiguous!");
3477 }
55e303ae 3478 prev_addr = m->phys_page;
1c79356b
A
3479 ++page_count;
3480 }
3481 if (page_count != npages) {
2d21ac55 3482 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
3483 pages, page_count, npages);
3484 panic("vm_page_verify_contiguous: count error");
3485 }
3486 return 1;
3487}
1c79356b
A
3488
3489
2d21ac55
A
3490/*
3491 * Check the free lists for proper length etc.
3492 */
b0d623f7
A
3493static unsigned int
3494vm_page_verify_free_list(
d1ecb069 3495 queue_head_t *vm_page_queue,
b0d623f7
A
3496 unsigned int color,
3497 vm_page_t look_for_page,
3498 boolean_t expect_page)
3499{
3500 unsigned int npages;
3501 vm_page_t m;
3502 vm_page_t prev_m;
3503 boolean_t found_page;
3504
3505 found_page = FALSE;
3506 npages = 0;
d1ecb069
A
3507 prev_m = (vm_page_t) vm_page_queue;
3508 queue_iterate(vm_page_queue,
b0d623f7
A
3509 m,
3510 vm_page_t,
3511 pageq) {
6d2010ae 3512
b0d623f7
A
3513 if (m == look_for_page) {
3514 found_page = TRUE;
3515 }
3516 if ((vm_page_t) m->pageq.prev != prev_m)
3517 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3518 color, npages, m, m->pageq.prev, prev_m);
b0d623f7
A
3519 if ( ! m->busy )
3520 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3521 color, npages, m);
6d2010ae
A
3522 if (color != (unsigned int) -1) {
3523 if ((m->phys_page & vm_color_mask) != color)
3524 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3525 color, npages, m, m->phys_page & vm_color_mask, color);
3526 if ( ! m->free )
3527 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3528 color, npages, m);
3529 }
b0d623f7
A
3530 ++npages;
3531 prev_m = m;
3532 }
3533 if (look_for_page != VM_PAGE_NULL) {
3534 unsigned int other_color;
3535
3536 if (expect_page && !found_page) {
3537 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3538 color, npages, look_for_page, look_for_page->phys_page);
3539 _vm_page_print(look_for_page);
3540 for (other_color = 0;
3541 other_color < vm_colors;
3542 other_color++) {
3543 if (other_color == color)
3544 continue;
d1ecb069 3545 vm_page_verify_free_list(&vm_page_queue_free[other_color],
6d2010ae 3546 other_color, look_for_page, FALSE);
b0d623f7 3547 }
6d2010ae 3548 if (color == (unsigned int) -1) {
d1ecb069
A
3549 vm_page_verify_free_list(&vm_lopage_queue_free,
3550 (unsigned int) -1, look_for_page, FALSE);
3551 }
b0d623f7
A
3552 panic("vm_page_verify_free_list(color=%u)\n", color);
3553 }
3554 if (!expect_page && found_page) {
3555 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3556 color, npages, look_for_page, look_for_page->phys_page);
3557 }
3558 }
3559 return npages;
3560}
3561
3562static boolean_t vm_page_verify_free_lists_enabled = FALSE;
2d21ac55
A
3563static void
3564vm_page_verify_free_lists( void )
3565{
d1ecb069 3566 unsigned int color, npages, nlopages;
b0d623f7
A
3567
3568 if (! vm_page_verify_free_lists_enabled)
3569 return;
3570
2d21ac55 3571 npages = 0;
b0d623f7
A
3572
3573 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3574
3575 for( color = 0; color < vm_colors; color++ ) {
d1ecb069 3576 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
6d2010ae 3577 color, VM_PAGE_NULL, FALSE);
2d21ac55 3578 }
d1ecb069
A
3579 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3580 (unsigned int) -1,
3581 VM_PAGE_NULL, FALSE);
3582 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3583 panic("vm_page_verify_free_lists: "
3584 "npages %u free_count %d nlopages %u lo_free_count %u",
3585 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
6d2010ae 3586
b0d623f7 3587 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3588}
2d21ac55 3589
b0d623f7
A
3590void
3591vm_page_queues_assert(
3592 vm_page_t mem,
3593 int val)
3594{
316670eb
A
3595#if DEBUG
3596 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3597#endif
b0d623f7
A
3598 if (mem->free + mem->active + mem->inactive + mem->speculative +
3599 mem->throttled + mem->pageout_queue > (val)) {
3600 _vm_page_print(mem);
3601 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3602 }
3603 if (VM_PAGE_WIRED(mem)) {
3604 assert(!mem->active);
3605 assert(!mem->inactive);
3606 assert(!mem->speculative);
3607 assert(!mem->throttled);
316670eb 3608 assert(!mem->pageout_queue);
b0d623f7
A
3609 }
3610}
3611#endif /* MACH_ASSERT */
2d21ac55 3612
91447636 3613
1c79356b 3614/*
2d21ac55 3615 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
3616 *
3617 * Find a region large enough to contain at least n pages
1c79356b
A
3618 * of contiguous physical memory.
3619 *
2d21ac55
A
3620 * This is done by traversing the vm_page_t array in a linear fashion
3621 * we assume that the vm_page_t array has the avaiable physical pages in an
3622 * ordered, ascending list... this is currently true of all our implementations
3623 * and must remain so... there can be 'holes' in the array... we also can
3624 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3625 * which use to happen via 'vm_page_convert'... that function was no longer
3626 * being called and was removed...
3627 *
3628 * The basic flow consists of stabilizing some of the interesting state of
3629 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3630 * sweep at the beginning of the array looking for pages that meet our criterea
3631 * for a 'stealable' page... currently we are pretty conservative... if the page
3632 * meets this criterea and is physically contiguous to the previous page in the 'run'
3633 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3634 * and start to develop a new run... if at this point we've already considered
3635 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3636 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3637 * to other threads trying to acquire free pages (or move pages from q to q),
3638 * and then continue from the spot we left off... we only make 1 pass through the
3639 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3640 * which steals the pages from the queues they're currently on... pages on the free
3641 * queue can be stolen directly... pages that are on any of the other queues
3642 * must be removed from the object they are tabled on... this requires taking the
3643 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3644 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3645 * dump the pages we've currently stolen back to the free list, and pick up our
3646 * scan from the point where we aborted the 'current' run.
3647 *
3648 *
1c79356b 3649 * Requirements:
2d21ac55 3650 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 3651 *
2d21ac55 3652 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 3653 *
e5568f75 3654 * Algorithm:
1c79356b 3655 */
2d21ac55
A
3656
3657#define MAX_CONSIDERED_BEFORE_YIELD 1000
3658
3659
3660#define RESET_STATE_OF_RUN() \
3661 MACRO_BEGIN \
3662 prevcontaddr = -2; \
b0d623f7 3663 start_pnum = -1; \
2d21ac55
A
3664 free_considered = 0; \
3665 substitute_needed = 0; \
3666 npages = 0; \
3667 MACRO_END
3668
b0d623f7
A
3669/*
3670 * Can we steal in-use (i.e. not free) pages when searching for
3671 * physically-contiguous pages ?
3672 */
3673#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3674
3675static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3676#if DEBUG
3677int vm_page_find_contig_debug = 0;
3678#endif
2d21ac55 3679
1c79356b
A
3680static vm_page_t
3681vm_page_find_contiguous(
2d21ac55
A
3682 unsigned int contig_pages,
3683 ppnum_t max_pnum,
b0d623f7
A
3684 ppnum_t pnum_mask,
3685 boolean_t wire,
3686 int flags)
1c79356b 3687{
2d21ac55 3688 vm_page_t m = NULL;
e5568f75 3689 ppnum_t prevcontaddr;
b0d623f7
A
3690 ppnum_t start_pnum;
3691 unsigned int npages, considered, scanned;
3692 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3693 unsigned int idx_last_contig_page_found = 0;
2d21ac55
A
3694 int free_considered, free_available;
3695 int substitute_needed;
b0d623f7 3696 boolean_t wrapped;
593a1d5f 3697#if DEBUG
b0d623f7
A
3698 clock_sec_t tv_start_sec, tv_end_sec;
3699 clock_usec_t tv_start_usec, tv_end_usec;
593a1d5f
A
3700#endif
3701#if MACH_ASSERT
2d21ac55
A
3702 int yielded = 0;
3703 int dumped_run = 0;
3704 int stolen_pages = 0;
91447636 3705#endif
1c79356b 3706
2d21ac55 3707 if (contig_pages == 0)
1c79356b
A
3708 return VM_PAGE_NULL;
3709
2d21ac55
A
3710#if MACH_ASSERT
3711 vm_page_verify_free_lists();
593a1d5f
A
3712#endif
3713#if DEBUG
2d21ac55
A
3714 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3715#endif
3716 vm_page_lock_queues();
b0d623f7 3717 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3718
3719 RESET_STATE_OF_RUN();
1c79356b 3720
b0d623f7 3721 scanned = 0;
2d21ac55
A
3722 considered = 0;
3723 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 3724
b0d623f7
A
3725 wrapped = FALSE;
3726
3727 if(flags & KMA_LOMEM)
3728 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3729 else
3730 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3731
3732 orig_last_idx = idx_last_contig_page_found;
3733 last_idx = orig_last_idx;
3734
3735 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
3736 npages < contig_pages && page_idx < vm_pages_count;
3737 page_idx++) {
b0d623f7
A
3738retry:
3739 if (wrapped &&
3740 npages == 0 &&
3741 page_idx >= orig_last_idx) {
3742 /*
3743 * We're back where we started and we haven't
3744 * found any suitable contiguous range. Let's
3745 * give up.
3746 */
3747 break;
3748 }
3749 scanned++;
2d21ac55 3750 m = &vm_pages[page_idx];
e5568f75 3751
b0d623f7
A
3752 assert(!m->fictitious);
3753 assert(!m->private);
3754
2d21ac55
A
3755 if (max_pnum && m->phys_page > max_pnum) {
3756 /* no more low pages... */
3757 break;
e5568f75 3758 }
6d2010ae 3759 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
b0d623f7
A
3760 /*
3761 * not aligned
3762 */
3763 RESET_STATE_OF_RUN();
3764
3765 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
2d21ac55
A
3766 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3767 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
316670eb 3768 m->cleaning || m->overwriting || m->restart || m->unusual || m->pageout) {
2d21ac55
A
3769 /*
3770 * page is in a transient state
3771 * or a state we don't want to deal
3772 * with, so don't consider it which
3773 * means starting a new run
3774 */
3775 RESET_STATE_OF_RUN();
1c79356b 3776
2d21ac55
A
3777 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3778 /*
3779 * page needs to be on one of our queues
3780 * in order for it to be stable behind the
3781 * locks we hold at this point...
3782 * if not, don't consider it which
3783 * means starting a new run
3784 */
3785 RESET_STATE_OF_RUN();
3786
3787 } else if (!m->free && (!m->tabled || m->busy)) {
3788 /*
3789 * pages on the free list are always 'busy'
3790 * so we couldn't test for 'busy' in the check
3791 * for the transient states... pages that are
3792 * 'free' are never 'tabled', so we also couldn't
3793 * test for 'tabled'. So we check here to make
3794 * sure that a non-free page is not busy and is
3795 * tabled on an object...
3796 * if not, don't consider it which
3797 * means starting a new run
3798 */
3799 RESET_STATE_OF_RUN();
3800
3801 } else {
3802 if (m->phys_page != prevcontaddr + 1) {
b0d623f7
A
3803 if ((m->phys_page & pnum_mask) != 0) {
3804 RESET_STATE_OF_RUN();
3805 goto did_consider;
3806 } else {
3807 npages = 1;
3808 start_idx = page_idx;
3809 start_pnum = m->phys_page;
3810 }
2d21ac55
A
3811 } else {
3812 npages++;
e5568f75 3813 }
2d21ac55 3814 prevcontaddr = m->phys_page;
b0d623f7
A
3815
3816 VM_PAGE_CHECK(m);
2d21ac55
A
3817 if (m->free) {
3818 free_considered++;
b0d623f7
A
3819 } else {
3820 /*
3821 * This page is not free.
3822 * If we can't steal used pages,
3823 * we have to give up this run
3824 * and keep looking.
3825 * Otherwise, we might need to
3826 * move the contents of this page
3827 * into a substitute page.
3828 */
3829#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3830 if (m->pmapped || m->dirty) {
3831 substitute_needed++;
3832 }
3833#else
3834 RESET_STATE_OF_RUN();
3835#endif
2d21ac55 3836 }
b0d623f7 3837
2d21ac55
A
3838 if ((free_considered + substitute_needed) > free_available) {
3839 /*
3840 * if we let this run continue
3841 * we will end up dropping the vm_page_free_count
3842 * below the reserve limit... we need to abort
3843 * this run, but we can at least re-consider this
3844 * page... thus the jump back to 'retry'
3845 */
3846 RESET_STATE_OF_RUN();
3847
3848 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3849 considered++;
3850 goto retry;
e5568f75 3851 }
2d21ac55
A
3852 /*
3853 * free_available == 0
3854 * so can't consider any free pages... if
3855 * we went to retry in this case, we'd
3856 * get stuck looking at the same page
3857 * w/o making any forward progress
3858 * we also want to take this path if we've already
3859 * reached our limit that controls the lock latency
3860 */
e5568f75 3861 }
2d21ac55 3862 }
b0d623f7 3863did_consider:
2d21ac55
A
3864 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3865
b0d623f7 3866 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3867 vm_page_unlock_queues();
e5568f75 3868
2d21ac55
A
3869 mutex_pause(0);
3870
3871 vm_page_lock_queues();
b0d623f7 3872 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3873
3874 RESET_STATE_OF_RUN();
1c79356b 3875 /*
2d21ac55
A
3876 * reset our free page limit since we
3877 * dropped the lock protecting the vm_page_free_queue
1c79356b 3878 */
2d21ac55
A
3879 free_available = vm_page_free_count - vm_page_free_reserved;
3880 considered = 0;
3881#if MACH_ASSERT
3882 yielded++;
3883#endif
3884 goto retry;
3885 }
3886 considered++;
3887 }
3888 m = VM_PAGE_NULL;
3889
b0d623f7
A
3890 if (npages != contig_pages) {
3891 if (!wrapped) {
3892 /*
3893 * We didn't find a contiguous range but we didn't
3894 * start from the very first page.
3895 * Start again from the very first page.
3896 */
3897 RESET_STATE_OF_RUN();
3898 if( flags & KMA_LOMEM)
3899 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3900 else
3901 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3902 last_idx = 0;
3903 page_idx = last_idx;
3904 wrapped = TRUE;
3905 goto retry;
3906 }
3907 lck_mtx_unlock(&vm_page_queue_free_lock);
3908 } else {
2d21ac55
A
3909 vm_page_t m1;
3910 vm_page_t m2;
3911 unsigned int cur_idx;
3912 unsigned int tmp_start_idx;
3913 vm_object_t locked_object = VM_OBJECT_NULL;
3914 boolean_t abort_run = FALSE;
3915
b0d623f7
A
3916 assert(page_idx - start_idx == contig_pages);
3917
2d21ac55
A
3918 tmp_start_idx = start_idx;
3919
3920 /*
3921 * first pass through to pull the free pages
3922 * off of the free queue so that in case we
3923 * need substitute pages, we won't grab any
3924 * of the free pages in the run... we'll clear
3925 * the 'free' bit in the 2nd pass, and even in
3926 * an abort_run case, we'll collect all of the
3927 * free pages in this run and return them to the free list
3928 */
3929 while (start_idx < page_idx) {
3930
3931 m1 = &vm_pages[start_idx++];
3932
b0d623f7
A
3933#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3934 assert(m1->free);
3935#endif
3936
2d21ac55 3937 if (m1->free) {
0b4c1975 3938 unsigned int color;
2d21ac55 3939
0b4c1975 3940 color = m1->phys_page & vm_color_mask;
b0d623f7 3941#if MACH_ASSERT
6d2010ae 3942 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
b0d623f7 3943#endif
0b4c1975
A
3944 queue_remove(&vm_page_queue_free[color],
3945 m1,
3946 vm_page_t,
3947 pageq);
d1ecb069
A
3948 m1->pageq.next = NULL;
3949 m1->pageq.prev = NULL;
0b4c1975 3950#if MACH_ASSERT
6d2010ae 3951 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
0b4c1975 3952#endif
b0d623f7
A
3953 /*
3954 * Clear the "free" bit so that this page
3955 * does not get considered for another
3956 * concurrent physically-contiguous allocation.
3957 */
3958 m1->free = FALSE;
3959 assert(m1->busy);
0b4c1975
A
3960
3961 vm_page_free_count--;
2d21ac55
A
3962 }
3963 }
3964 /*
3965 * adjust global freelist counts
3966 */
3967 if (vm_page_free_count < vm_page_free_count_minimum)
3968 vm_page_free_count_minimum = vm_page_free_count;
3969
b0d623f7
A
3970 if( flags & KMA_LOMEM)
3971 vm_page_lomem_find_contiguous_last_idx = page_idx;
3972 else
3973 vm_page_find_contiguous_last_idx = page_idx;
3974
2d21ac55
A
3975 /*
3976 * we can drop the free queue lock at this point since
3977 * we've pulled any 'free' candidates off of the list
3978 * we need it dropped so that we can do a vm_page_grab
3979 * when substituing for pmapped/dirty pages
3980 */
b0d623f7 3981 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3982
3983 start_idx = tmp_start_idx;
3984 cur_idx = page_idx - 1;
3985
3986 while (start_idx++ < page_idx) {
3987 /*
3988 * must go through the list from back to front
3989 * so that the page list is created in the
3990 * correct order - low -> high phys addresses
3991 */
3992 m1 = &vm_pages[cur_idx--];
3993
b0d623f7
A
3994 assert(!m1->free);
3995 if (m1->object == VM_OBJECT_NULL) {
2d21ac55 3996 /*
b0d623f7 3997 * page has already been removed from
2d21ac55
A
3998 * the free list in the 1st pass
3999 */
b0d623f7 4000 assert(m1->offset == (vm_object_offset_t) -1);
2d21ac55
A
4001 assert(m1->busy);
4002 assert(!m1->wanted);
4003 assert(!m1->laundry);
e5568f75 4004 } else {
2d21ac55
A
4005 vm_object_t object;
4006
4007 if (abort_run == TRUE)
4008 continue;
4009
4010 object = m1->object;
4011
4012 if (object != locked_object) {
4013 if (locked_object) {
4014 vm_object_unlock(locked_object);
4015 locked_object = VM_OBJECT_NULL;
4016 }
4017 if (vm_object_lock_try(object))
4018 locked_object = object;
4019 }
4020 if (locked_object == VM_OBJECT_NULL ||
b0d623f7 4021 (VM_PAGE_WIRED(m1) || m1->gobbled ||
2d21ac55
A
4022 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
4023 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
316670eb 4024 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->busy)) {
2d21ac55
A
4025
4026 if (locked_object) {
4027 vm_object_unlock(locked_object);
4028 locked_object = VM_OBJECT_NULL;
4029 }
4030 tmp_start_idx = cur_idx;
4031 abort_run = TRUE;
4032 continue;
4033 }
4034 if (m1->pmapped || m1->dirty) {
4035 int refmod;
4036 vm_object_offset_t offset;
4037
4038 m2 = vm_page_grab();
4039
4040 if (m2 == VM_PAGE_NULL) {
4041 if (locked_object) {
4042 vm_object_unlock(locked_object);
4043 locked_object = VM_OBJECT_NULL;
4044 }
4045 tmp_start_idx = cur_idx;
4046 abort_run = TRUE;
4047 continue;
4048 }
4049 if (m1->pmapped)
4050 refmod = pmap_disconnect(m1->phys_page);
4051 else
4052 refmod = 0;
4053 vm_page_copy(m1, m2);
4054
4055 m2->reference = m1->reference;
4056 m2->dirty = m1->dirty;
4057
4058 if (refmod & VM_MEM_REFERENCED)
4059 m2->reference = TRUE;
316670eb
A
4060 if (refmod & VM_MEM_MODIFIED) {
4061 SET_PAGE_DIRTY(m2, TRUE);
4062 }
2d21ac55
A
4063 offset = m1->offset;
4064
4065 /*
4066 * completely cleans up the state
4067 * of the page so that it is ready
4068 * to be put onto the free list, or
4069 * for this purpose it looks like it
4070 * just came off of the free list
4071 */
4072 vm_page_free_prepare(m1);
4073
4074 /*
4075 * make sure we clear the ref/mod state
4076 * from the pmap layer... else we risk
4077 * inheriting state from the last time
4078 * this page was used...
4079 */
4080 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4081 /*
4082 * now put the substitute page on the object
4083 */
316670eb 4084 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
2d21ac55
A
4085
4086 if (m2->reference)
4087 vm_page_activate(m2);
4088 else
4089 vm_page_deactivate(m2);
4090
4091 PAGE_WAKEUP_DONE(m2);
4092
4093 } else {
4094 /*
4095 * completely cleans up the state
4096 * of the page so that it is ready
4097 * to be put onto the free list, or
4098 * for this purpose it looks like it
4099 * just came off of the free list
4100 */
4101 vm_page_free_prepare(m1);
4102 }
4103#if MACH_ASSERT
4104 stolen_pages++;
4105#endif
1c79356b 4106 }
2d21ac55
A
4107 m1->pageq.next = (queue_entry_t) m;
4108 m1->pageq.prev = NULL;
4109 m = m1;
e5568f75 4110 }
2d21ac55
A
4111 if (locked_object) {
4112 vm_object_unlock(locked_object);
4113 locked_object = VM_OBJECT_NULL;
1c79356b
A
4114 }
4115
2d21ac55
A
4116 if (abort_run == TRUE) {
4117 if (m != VM_PAGE_NULL) {
b0d623f7 4118 vm_page_free_list(m, FALSE);
2d21ac55
A
4119 }
4120#if MACH_ASSERT
4121 dumped_run++;
4122#endif
4123 /*
4124 * want the index of the last
4125 * page in this run that was
4126 * successfully 'stolen', so back
4127 * it up 1 for the auto-decrement on use
4128 * and 1 more to bump back over this page
4129 */
4130 page_idx = tmp_start_idx + 2;
b0d623f7
A
4131 if (page_idx >= vm_pages_count) {
4132 if (wrapped)
4133 goto done_scanning;
4134 page_idx = last_idx = 0;
4135 wrapped = TRUE;
4136 }
4137 abort_run = FALSE;
4138
2d21ac55 4139 /*
b0d623f7
A
4140 * We didn't find a contiguous range but we didn't
4141 * start from the very first page.
4142 * Start again from the very first page.
2d21ac55 4143 */
b0d623f7
A
4144 RESET_STATE_OF_RUN();
4145
4146 if( flags & KMA_LOMEM)
4147 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4148 else
4149 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4150
4151 last_idx = page_idx;
2d21ac55 4152
b0d623f7
A
4153 lck_mtx_lock(&vm_page_queue_free_lock);
4154 /*
4155 * reset our free page limit since we
4156 * dropped the lock protecting the vm_page_free_queue
4157 */
4158 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
4159 goto retry;
4160 }
e5568f75 4161
e5568f75 4162 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55
A
4163
4164 if (wire == TRUE)
4165 m1->wire_count++;
4166 else
4167 m1->gobbled = TRUE;
e5568f75 4168 }
2d21ac55
A
4169 if (wire == FALSE)
4170 vm_page_gobble_count += npages;
4171
4172 /*
4173 * gobbled pages are also counted as wired pages
4174 */
e5568f75 4175 vm_page_wire_count += npages;
e5568f75 4176
2d21ac55
A
4177 assert(vm_page_verify_contiguous(m, npages));
4178 }
4179done_scanning:
4180 vm_page_unlock_queues();
4181
593a1d5f 4182#if DEBUG
2d21ac55
A
4183 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4184
4185 tv_end_sec -= tv_start_sec;
4186 if (tv_end_usec < tv_start_usec) {
4187 tv_end_sec--;
4188 tv_end_usec += 1000000;
1c79356b 4189 }
2d21ac55
A
4190 tv_end_usec -= tv_start_usec;
4191 if (tv_end_usec >= 1000000) {
4192 tv_end_sec++;
4193 tv_end_sec -= 1000000;
4194 }
b0d623f7
A
4195 if (vm_page_find_contig_debug) {
4196 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4197 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4198 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4199 scanned, yielded, dumped_run, stolen_pages);
4200 }
e5568f75 4201
593a1d5f
A
4202#endif
4203#if MACH_ASSERT
2d21ac55
A
4204 vm_page_verify_free_lists();
4205#endif
e5568f75 4206 return m;
1c79356b
A
4207}
4208
4209/*
4210 * Allocate a list of contiguous, wired pages.
4211 */
4212kern_return_t
4213cpm_allocate(
4214 vm_size_t size,
4215 vm_page_t *list,
2d21ac55 4216 ppnum_t max_pnum,
b0d623f7
A
4217 ppnum_t pnum_mask,
4218 boolean_t wire,
4219 int flags)
1c79356b 4220{
91447636
A
4221 vm_page_t pages;
4222 unsigned int npages;
1c79356b 4223
6d2010ae 4224 if (size % PAGE_SIZE != 0)
1c79356b
A
4225 return KERN_INVALID_ARGUMENT;
4226
b0d623f7
A
4227 npages = (unsigned int) (size / PAGE_SIZE);
4228 if (npages != size / PAGE_SIZE) {
4229 /* 32-bit overflow */
4230 return KERN_INVALID_ARGUMENT;
4231 }
1c79356b 4232
1c79356b
A
4233 /*
4234 * Obtain a pointer to a subset of the free
4235 * list large enough to satisfy the request;
4236 * the region will be physically contiguous.
4237 */
b0d623f7 4238 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 4239
2d21ac55 4240 if (pages == VM_PAGE_NULL)
1c79356b 4241 return KERN_NO_SPACE;
1c79356b 4242 /*
2d21ac55 4243 * determine need for wakeups
1c79356b 4244 */
2d21ac55 4245 if ((vm_page_free_count < vm_page_free_min) ||
316670eb
A
4246 ((vm_page_free_count < vm_page_free_target) &&
4247 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4248 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55 4249
6d2010ae
A
4250 VM_CHECK_MEMORYSTATUS;
4251
1c79356b
A
4252 /*
4253 * The CPM pages should now be available and
4254 * ordered by ascending physical address.
4255 */
4256 assert(vm_page_verify_contiguous(pages, npages));
4257
4258 *list = pages;
4259 return KERN_SUCCESS;
4260}
6d2010ae
A
4261
4262
4263unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4264
4265/*
4266 * when working on a 'run' of pages, it is necessary to hold
4267 * the vm_page_queue_lock (a hot global lock) for certain operations
4268 * on the page... however, the majority of the work can be done
4269 * while merely holding the object lock... in fact there are certain
4270 * collections of pages that don't require any work brokered by the
4271 * vm_page_queue_lock... to mitigate the time spent behind the global
4272 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4273 * while doing all of the work that doesn't require the vm_page_queue_lock...
4274 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4275 * necessary work for each page... we will grab the busy bit on the page
4276 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4277 * if it can't immediately take the vm_page_queue_lock in order to compete
4278 * for the locks in the same order that vm_pageout_scan takes them.
4279 * the operation names are modeled after the names of the routines that
4280 * need to be called in order to make the changes very obvious in the
4281 * original loop
4282 */
4283
4284void
4285vm_page_do_delayed_work(
4286 vm_object_t object,
4287 struct vm_page_delayed_work *dwp,
4288 int dw_count)
4289{
4290 int j;
4291 vm_page_t m;
4292 vm_page_t local_free_q = VM_PAGE_NULL;
6d2010ae
A
4293
4294 /*
4295 * pageout_scan takes the vm_page_lock_queues first
4296 * then tries for the object lock... to avoid what
4297 * is effectively a lock inversion, we'll go to the
4298 * trouble of taking them in that same order... otherwise
4299 * if this object contains the majority of the pages resident
4300 * in the UBC (or a small set of large objects actively being
4301 * worked on contain the majority of the pages), we could
4302 * cause the pageout_scan thread to 'starve' in its attempt
4303 * to find pages to move to the free queue, since it has to
4304 * successfully acquire the object lock of any candidate page
4305 * before it can steal/clean it.
4306 */
4307 if (!vm_page_trylockspin_queues()) {
4308 vm_object_unlock(object);
4309
4310 vm_page_lockspin_queues();
4311
4312 for (j = 0; ; j++) {
4313 if (!vm_object_lock_avoid(object) &&
4314 _vm_object_lock_try(object))
4315 break;
4316 vm_page_unlock_queues();
4317 mutex_pause(j);
4318 vm_page_lockspin_queues();
4319 }
6d2010ae
A
4320 }
4321 for (j = 0; j < dw_count; j++, dwp++) {
4322
4323 m = dwp->dw_m;
4324
6d2010ae
A
4325 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4326 vm_pageout_throttle_up(m);
4327
4328 if (dwp->dw_mask & DW_vm_page_wire)
4329 vm_page_wire(m);
4330 else if (dwp->dw_mask & DW_vm_page_unwire) {
4331 boolean_t queueit;
4332
4333 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4334
4335 vm_page_unwire(m, queueit);
4336 }
4337 if (dwp->dw_mask & DW_vm_page_free) {
4338 vm_page_free_prepare_queues(m);
4339
4340 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4341 /*
4342 * Add this page to our list of reclaimed pages,
4343 * to be freed later.
4344 */
4345 m->pageq.next = (queue_entry_t) local_free_q;
4346 local_free_q = m;
4347 } else {
4348 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4349 vm_page_deactivate_internal(m, FALSE);
4350 else if (dwp->dw_mask & DW_vm_page_activate) {
4351 if (m->active == FALSE) {
4352 vm_page_activate(m);
4353 }
4354 }
4355 else if (dwp->dw_mask & DW_vm_page_speculate)
4356 vm_page_speculate(m, TRUE);
316670eb
A
4357 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4358 /*
4359 * if we didn't hold the object lock and did this,
4360 * we might disconnect the page, then someone might
4361 * soft fault it back in, then we would put it on the
4362 * cleaned queue, and so we would have a referenced (maybe even dirty)
4363 * page on that queue, which we don't want
4364 */
4365 int refmod_state = pmap_disconnect(m->phys_page);
4366
4367 if ((refmod_state & VM_MEM_REFERENCED)) {
4368 /*
4369 * this page has been touched since it got cleaned; let's activate it
4370 * if it hasn't already been
4371 */
4372 vm_pageout_enqueued_cleaned++;
4373 vm_pageout_cleaned_reactivated++;
4374 vm_pageout_cleaned_commit_reactivated++;
4375
4376 if (m->active == FALSE)
4377 vm_page_activate(m);
4378 } else {
4379 m->reference = FALSE;
4380 vm_page_enqueue_cleaned(m);
4381 }
4382 }
6d2010ae
A
4383 else if (dwp->dw_mask & DW_vm_page_lru)
4384 vm_page_lru(m);
316670eb
A
4385 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4386 if ( !m->pageout_queue)
4387 VM_PAGE_QUEUES_REMOVE(m);
4388 }
6d2010ae
A
4389 if (dwp->dw_mask & DW_set_reference)
4390 m->reference = TRUE;
4391 else if (dwp->dw_mask & DW_clear_reference)
4392 m->reference = FALSE;
4393
4394 if (dwp->dw_mask & DW_move_page) {
316670eb
A
4395 if ( !m->pageout_queue) {
4396 VM_PAGE_QUEUES_REMOVE(m);
6d2010ae 4397
316670eb 4398 assert(m->object != kernel_object);
6d2010ae 4399
316670eb
A
4400 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4401 }
6d2010ae
A
4402 }
4403 if (dwp->dw_mask & DW_clear_busy)
4404 m->busy = FALSE;
4405
4406 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4407 PAGE_WAKEUP(m);
4408 }
4409 }
4410 vm_page_unlock_queues();
4411
4412 if (local_free_q)
4413 vm_page_free_list(local_free_q, TRUE);
4414
4415 VM_CHECK_MEMORYSTATUS;
4416
4417}
4418
0b4c1975
A
4419kern_return_t
4420vm_page_alloc_list(
4421 int page_count,
4422 int flags,
4423 vm_page_t *list)
4424{
4425 vm_page_t lo_page_list = VM_PAGE_NULL;
4426 vm_page_t mem;
4427 int i;
4428
4429 if ( !(flags & KMA_LOMEM))
4430 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4431
4432 for (i = 0; i < page_count; i++) {
4433
4434 mem = vm_page_grablo();
4435
4436 if (mem == VM_PAGE_NULL) {
4437 if (lo_page_list)
4438 vm_page_free_list(lo_page_list, FALSE);
4439
4440 *list = VM_PAGE_NULL;
4441
4442 return (KERN_RESOURCE_SHORTAGE);
4443 }
4444 mem->pageq.next = (queue_entry_t) lo_page_list;
4445 lo_page_list = mem;
4446 }
4447 *list = lo_page_list;
4448
4449 return (KERN_SUCCESS);
4450}
4451
4452void
4453vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4454{
4455 page->offset = offset;
4456}
4457
4458vm_page_t
4459vm_page_get_next(vm_page_t page)
4460{
4461 return ((vm_page_t) page->pageq.next);
4462}
4463
4464vm_object_offset_t
4465vm_page_get_offset(vm_page_t page)
4466{
4467 return (page->offset);
4468}
4469
4470ppnum_t
4471vm_page_get_phys_page(vm_page_t page)
4472{
4473 return (page->phys_page);
4474}
4475
4476
b0d623f7
A
4477/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4478
d1ecb069
A
4479#if HIBERNATION
4480
b0d623f7
A
4481static vm_page_t hibernate_gobble_queue;
4482
0b4c1975
A
4483extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4484
4485static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4486static int hibernate_flush_dirty_pages(void);
4487static int hibernate_flush_queue(queue_head_t *, int);
0b4c1975
A
4488
4489void hibernate_flush_wait(void);
4490void hibernate_mark_in_progress(void);
4491void hibernate_clear_in_progress(void);
4492
4493
4494struct hibernate_statistics {
4495 int hibernate_considered;
4496 int hibernate_reentered_on_q;
4497 int hibernate_found_dirty;
4498 int hibernate_skipped_cleaning;
4499 int hibernate_skipped_transient;
4500 int hibernate_skipped_precious;
4501 int hibernate_queue_nolock;
4502 int hibernate_queue_paused;
4503 int hibernate_throttled;
4504 int hibernate_throttle_timeout;
4505 int hibernate_drained;
4506 int hibernate_drain_timeout;
4507 int cd_lock_failed;
4508 int cd_found_precious;
4509 int cd_found_wired;
4510 int cd_found_busy;
4511 int cd_found_unusual;
4512 int cd_found_cleaning;
4513 int cd_found_laundry;
4514 int cd_found_dirty;
4515 int cd_local_free;
4516 int cd_total_free;
4517 int cd_vm_page_wire_count;
4518 int cd_pages;
4519 int cd_discarded;
4520 int cd_count_wire;
4521} hibernate_stats;
4522
4523
4524
4525static int
4526hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4527{
4528 wait_result_t wait_result;
4529
4530 vm_page_lock_queues();
4531
4532 while (q->pgo_laundry) {
4533
4534 q->pgo_draining = TRUE;
4535
4536 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4537
4538 vm_page_unlock_queues();
4539
4540 wait_result = thread_block(THREAD_CONTINUE_NULL);
4541
4542 if (wait_result == THREAD_TIMED_OUT) {
4543 hibernate_stats.hibernate_drain_timeout++;
4544 return (1);
4545 }
4546 vm_page_lock_queues();
4547
4548 hibernate_stats.hibernate_drained++;
4549 }
4550 vm_page_unlock_queues();
4551
4552 return (0);
4553}
4554
0b4c1975
A
4555
4556static int
4557hibernate_flush_queue(queue_head_t *q, int qcount)
4558{
4559 vm_page_t m;
4560 vm_object_t l_object = NULL;
4561 vm_object_t m_object = NULL;
4562 int refmod_state = 0;
4563 int try_failed_count = 0;
4564 int retval = 0;
4565 int current_run = 0;
4566 struct vm_pageout_queue *iq;
4567 struct vm_pageout_queue *eq;
4568 struct vm_pageout_queue *tq;
4569
316670eb 4570 hibernate_cleaning_in_progress = TRUE;
0b4c1975
A
4571
4572 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4573
4574 iq = &vm_pageout_queue_internal;
4575 eq = &vm_pageout_queue_external;
4576
4577 vm_page_lock_queues();
4578
4579 while (qcount && !queue_empty(q)) {
4580
4581 if (current_run++ == 1000) {
4582 if (hibernate_should_abort()) {
4583 retval = 1;
4584 break;
4585 }
4586 current_run = 0;
4587 }
4588
4589 m = (vm_page_t) queue_first(q);
4590 m_object = m->object;
4591
4592 /*
4593 * check to see if we currently are working
4594 * with the same object... if so, we've
4595 * already got the lock
4596 */
4597 if (m_object != l_object) {
4598 /*
4599 * the object associated with candidate page is
4600 * different from the one we were just working
4601 * with... dump the lock if we still own it
4602 */
4603 if (l_object != NULL) {
4604 vm_object_unlock(l_object);
4605 l_object = NULL;
4606 }
4607 /*
4608 * Try to lock object; since we've alread got the
4609 * page queues lock, we can only 'try' for this one.
4610 * if the 'try' fails, we need to do a mutex_pause
4611 * to allow the owner of the object lock a chance to
4612 * run...
4613 */
4614 if ( !vm_object_lock_try_scan(m_object)) {
4615
4616 if (try_failed_count > 20) {
4617 hibernate_stats.hibernate_queue_nolock++;
4618
4619 goto reenter_pg_on_q;
4620 }
4621 vm_pageout_scan_wants_object = m_object;
4622
4623 vm_page_unlock_queues();
4624 mutex_pause(try_failed_count++);
4625 vm_page_lock_queues();
4626
4627 hibernate_stats.hibernate_queue_paused++;
4628 continue;
4629 } else {
4630 l_object = m_object;
4631 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4632 }
4633 }
316670eb 4634 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
0b4c1975
A
4635 /*
4636 * page is not to be cleaned
4637 * put it back on the head of its queue
4638 */
4639 if (m->cleaning)
4640 hibernate_stats.hibernate_skipped_cleaning++;
4641 else
4642 hibernate_stats.hibernate_skipped_transient++;
4643
4644 goto reenter_pg_on_q;
4645 }
4646 if ( !m_object->pager_initialized && m_object->pager_created)
4647 goto reenter_pg_on_q;
4648
4649 if (m_object->copy == VM_OBJECT_NULL) {
4650 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4651 /*
4652 * let the normal hibernate image path
4653 * deal with these
4654 */
4655 goto reenter_pg_on_q;
4656 }
4657 }
4658 if ( !m->dirty && m->pmapped) {
4659 refmod_state = pmap_get_refmod(m->phys_page);
4660
316670eb
A
4661 if ((refmod_state & VM_MEM_MODIFIED)) {
4662 SET_PAGE_DIRTY(m, FALSE);
4663 }
0b4c1975
A
4664 } else
4665 refmod_state = 0;
4666
4667 if ( !m->dirty) {
4668 /*
4669 * page is not to be cleaned
4670 * put it back on the head of its queue
4671 */
4672 if (m->precious)
4673 hibernate_stats.hibernate_skipped_precious++;
4674
4675 goto reenter_pg_on_q;
4676 }
4677 tq = NULL;
4678
4679 if (m_object->internal) {
4680 if (VM_PAGE_Q_THROTTLED(iq))
4681 tq = iq;
4682 } else if (VM_PAGE_Q_THROTTLED(eq))
4683 tq = eq;
4684
4685 if (tq != NULL) {
4686 wait_result_t wait_result;
4687 int wait_count = 5;
4688
4689 if (l_object != NULL) {
4690 vm_object_unlock(l_object);
4691 l_object = NULL;
4692 }
4693 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4694
4695 tq->pgo_throttled = TRUE;
4696
4697 while (retval == 0) {
4698
4699 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4700
316670eb 4701 vm_page_unlock_queues();
0b4c1975 4702
316670eb 4703 wait_result = thread_block(THREAD_CONTINUE_NULL);
0b4c1975
A
4704
4705 vm_page_lock_queues();
4706
4707 if (hibernate_should_abort())
4708 retval = 1;
4709
4710 if (wait_result != THREAD_TIMED_OUT)
4711 break;
4712
4713 if (--wait_count == 0) {
316670eb
A
4714 hibernate_stats.hibernate_throttle_timeout++;
4715 retval = 1;
4716 }
0b4c1975
A
4717 }
4718 if (retval)
4719 break;
4720
4721 hibernate_stats.hibernate_throttled++;
4722
4723 continue;
4724 }
316670eb
A
4725 /*
4726 * we've already factored out pages in the laundry which
4727 * means this page can't be on the pageout queue so it's
4728 * safe to do the VM_PAGE_QUEUES_REMOVE
4729 */
4730 assert(!m->pageout_queue);
4731
0b4c1975
A
4732 VM_PAGE_QUEUES_REMOVE(m);
4733
316670eb 4734 vm_pageout_cluster(m, FALSE);
0b4c1975
A
4735
4736 hibernate_stats.hibernate_found_dirty++;
4737
4738 goto next_pg;
4739
4740reenter_pg_on_q:
4741 queue_remove(q, m, vm_page_t, pageq);
4742 queue_enter(q, m, vm_page_t, pageq);
4743
4744 hibernate_stats.hibernate_reentered_on_q++;
4745next_pg:
4746 hibernate_stats.hibernate_considered++;
4747
4748 qcount--;
4749 try_failed_count = 0;
4750 }
4751 if (l_object != NULL) {
4752 vm_object_unlock(l_object);
4753 l_object = NULL;
4754 }
316670eb 4755 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
0b4c1975
A
4756
4757 vm_page_unlock_queues();
4758
4759 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4760
316670eb
A
4761 hibernate_cleaning_in_progress = FALSE;
4762
0b4c1975
A
4763 return (retval);
4764}
4765
4766
4767static int
4768hibernate_flush_dirty_pages()
4769{
4770 struct vm_speculative_age_q *aq;
4771 uint32_t i;
4772
4773 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4774
4775 if (vm_page_local_q) {
4776 for (i = 0; i < vm_page_local_q_count; i++)
4777 vm_page_reactivate_local(i, TRUE, FALSE);
4778 }
4779
4780 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4781 int qcount;
4782 vm_page_t m;
4783
4784 aq = &vm_page_queue_speculative[i];
4785
4786 if (queue_empty(&aq->age_q))
4787 continue;
4788 qcount = 0;
4789
4790 vm_page_lockspin_queues();
4791
4792 queue_iterate(&aq->age_q,
4793 m,
4794 vm_page_t,
4795 pageq)
4796 {
4797 qcount++;
4798 }
4799 vm_page_unlock_queues();
4800
4801 if (qcount) {
4802 if (hibernate_flush_queue(&aq->age_q, qcount))
4803 return (1);
4804 }
4805 }
4806 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4807 return (1);
316670eb 4808 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
0b4c1975 4809 return (1);
316670eb
A
4810 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
4811 return (1);
4812 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
0b4c1975
A
4813 return (1);
4814
4815 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4816 return (1);
4817 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4818}
4819
4820
4821extern void IOSleep(unsigned int);
4822extern int sync_internal(void);
4823
4824int
4825hibernate_flush_memory()
4826{
4827 int retval;
4828
4829 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4830
4831 IOSleep(2 * 1000);
4832
4833 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4834
4835 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4836 if (consider_buffer_cache_collect != NULL) {
4837
4838 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4839
4840 sync_internal();
4841 (void)(*consider_buffer_cache_collect)(1);
7ddcb079 4842 consider_zone_gc(TRUE);
0b4c1975
A
4843
4844 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4845 }
4846 }
4847 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4848
4849 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4850 hibernate_stats.hibernate_considered,
4851 hibernate_stats.hibernate_reentered_on_q,
4852 hibernate_stats.hibernate_found_dirty);
4853 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4854 hibernate_stats.hibernate_skipped_cleaning,
4855 hibernate_stats.hibernate_skipped_transient,
4856 hibernate_stats.hibernate_skipped_precious,
4857 hibernate_stats.hibernate_queue_nolock);
4858 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4859 hibernate_stats.hibernate_queue_paused,
4860 hibernate_stats.hibernate_throttled,
4861 hibernate_stats.hibernate_throttle_timeout,
4862 hibernate_stats.hibernate_drained,
4863 hibernate_stats.hibernate_drain_timeout);
4864
4865 return (retval);
4866}
4867
6d2010ae 4868
b0d623f7
A
4869static void
4870hibernate_page_list_zero(hibernate_page_list_t *list)
4871{
4872 uint32_t bank;
4873 hibernate_bitmap_t * bitmap;
4874
4875 bitmap = &list->bank_bitmap[0];
4876 for (bank = 0; bank < list->bank_count; bank++)
4877 {
4878 uint32_t last_bit;
4879
4880 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4881 // set out-of-bound bits at end of bitmap.
4882 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4883 if (last_bit)
4884 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4885
4886 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4887 }
4888}
4889
4890void
4891hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4892{
4893 uint32_t i;
4894 vm_page_t m;
4895 uint64_t start, end, timeout, nsec;
4896 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4897 clock_get_uptime(&start);
4898
4899 for (i = 0; i < gobble_count; i++)
4900 {
4901 while (VM_PAGE_NULL == (m = vm_page_grab()))
4902 {
4903 clock_get_uptime(&end);
4904 if (end >= timeout)
4905 break;
4906 VM_PAGE_WAIT();
4907 }
4908 if (!m)
4909 break;
4910 m->busy = FALSE;
4911 vm_page_gobble(m);
4912
4913 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4914 hibernate_gobble_queue = m;
4915 }
4916
4917 clock_get_uptime(&end);
4918 absolutetime_to_nanoseconds(end - start, &nsec);
4919 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4920}
4921
4922void
4923hibernate_free_gobble_pages(void)
4924{
4925 vm_page_t m, next;
4926 uint32_t count = 0;
4927
4928 m = (vm_page_t) hibernate_gobble_queue;
4929 while(m)
4930 {
4931 next = (vm_page_t) m->pageq.next;
4932 vm_page_free(m);
4933 count++;
4934 m = next;
4935 }
4936 hibernate_gobble_queue = VM_PAGE_NULL;
4937
4938 if (count)
4939 HIBLOG("Freed %d pages\n", count);
4940}
4941
4942static boolean_t
4943hibernate_consider_discard(vm_page_t m)
4944{
4945 vm_object_t object = NULL;
4946 int refmod_state;
4947 boolean_t discard = FALSE;
4948
4949 do
4950 {
0b4c1975 4951 if (m->private)
b0d623f7
A
4952 panic("hibernate_consider_discard: private");
4953
0b4c1975
A
4954 if (!vm_object_lock_try(m->object)) {
4955 hibernate_stats.cd_lock_failed++;
b0d623f7 4956 break;
0b4c1975 4957 }
b0d623f7
A
4958 object = m->object;
4959
0b4c1975
A
4960 if (VM_PAGE_WIRED(m)) {
4961 hibernate_stats.cd_found_wired++;
b0d623f7 4962 break;
0b4c1975
A
4963 }
4964 if (m->precious) {
4965 hibernate_stats.cd_found_precious++;
b0d623f7 4966 break;
0b4c1975
A
4967 }
4968 if (m->busy || !object->alive) {
b0d623f7
A
4969 /*
4970 * Somebody is playing with this page.
4971 */
6d2010ae
A
4972 hibernate_stats.cd_found_busy++;
4973 break;
0b4c1975
A
4974 }
4975 if (m->absent || m->unusual || m->error) {
b0d623f7
A
4976 /*
4977 * If it's unusual in anyway, ignore it
4978 */
0b4c1975 4979 hibernate_stats.cd_found_unusual++;
b0d623f7 4980 break;
0b4c1975
A
4981 }
4982 if (m->cleaning) {
4983 hibernate_stats.cd_found_cleaning++;
b0d623f7 4984 break;
0b4c1975 4985 }
316670eb 4986 if (m->laundry) {
0b4c1975 4987 hibernate_stats.cd_found_laundry++;
b0d623f7 4988 break;
0b4c1975 4989 }
b0d623f7
A
4990 if (!m->dirty)
4991 {
4992 refmod_state = pmap_get_refmod(m->phys_page);
4993
4994 if (refmod_state & VM_MEM_REFERENCED)
4995 m->reference = TRUE;
316670eb
A
4996 if (refmod_state & VM_MEM_MODIFIED) {
4997 SET_PAGE_DIRTY(m, FALSE);
4998 }
b0d623f7
A
4999 }
5000
5001 /*
5002 * If it's clean or purgeable we can discard the page on wakeup.
5003 */
5004 discard = (!m->dirty)
5005 || (VM_PURGABLE_VOLATILE == object->purgable)
0b4c1975
A
5006 || (VM_PURGABLE_EMPTY == object->purgable);
5007
5008 if (discard == FALSE)
5009 hibernate_stats.cd_found_dirty++;
b0d623f7
A
5010 }
5011 while (FALSE);
5012
5013 if (object)
5014 vm_object_unlock(object);
5015
5016 return (discard);
5017}
5018
5019
5020static void
5021hibernate_discard_page(vm_page_t m)
5022{
5023 if (m->absent || m->unusual || m->error)
5024 /*
5025 * If it's unusual in anyway, ignore
5026 */
5027 return;
5028
316670eb
A
5029#if DEBUG
5030 vm_object_t object = m->object;
5031 if (!vm_object_lock_try(m->object))
5032 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5033#else
5034 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5035 makes sure these locks are uncontended before sleep */
5036#endif /* !DEBUG */
5037
b0d623f7
A
5038 if (m->pmapped == TRUE)
5039 {
5040 __unused int refmod_state = pmap_disconnect(m->phys_page);
5041 }
5042
5043 if (m->laundry)
5044 panic("hibernate_discard_page(%p) laundry", m);
5045 if (m->private)
5046 panic("hibernate_discard_page(%p) private", m);
5047 if (m->fictitious)
5048 panic("hibernate_discard_page(%p) fictitious", m);
5049
5050 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5051 {
5052 /* object should be on a queue */
5053 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5054 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5055 assert(old_queue);
b0d623f7
A
5056 vm_purgeable_token_delete_first(old_queue);
5057 m->object->purgable = VM_PURGABLE_EMPTY;
5058 }
5059
5060 vm_page_free(m);
316670eb
A
5061
5062#if DEBUG
5063 vm_object_unlock(object);
5064#endif /* DEBUG */
b0d623f7
A
5065}
5066
5067/*
5068 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5069 pages known to VM to not need saving are subtracted.
5070 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5071*/
5072
5073void
5074hibernate_page_list_setall(hibernate_page_list_t * page_list,
5075 hibernate_page_list_t * page_list_wired,
6d2010ae 5076 hibernate_page_list_t * page_list_pal,
b0d623f7
A
5077 uint32_t * pagesOut)
5078{
5079 uint64_t start, end, nsec;
5080 vm_page_t m;
5081 uint32_t pages = page_list->page_count;
5082 uint32_t count_zf = 0, count_throttled = 0;
316670eb 5083 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
b0d623f7
A
5084 uint32_t count_wire = pages;
5085 uint32_t count_discard_active = 0;
5086 uint32_t count_discard_inactive = 0;
316670eb 5087 uint32_t count_discard_cleaned = 0;
b0d623f7
A
5088 uint32_t count_discard_purgeable = 0;
5089 uint32_t count_discard_speculative = 0;
5090 uint32_t i;
5091 uint32_t bank;
5092 hibernate_bitmap_t * bitmap;
5093 hibernate_bitmap_t * bitmap_wired;
5094
5095
0b4c1975
A
5096 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5097
316670eb
A
5098#if DEBUG
5099 vm_page_lock_queues();
5100 if (vm_page_local_q) {
5101 for (i = 0; i < vm_page_local_q_count; i++) {
5102 struct vpl *lq;
5103 lq = &vm_page_local_q[i].vpl_un.vpl;
5104 VPL_LOCK(&lq->vpl_lock);
5105 }
5106 }
5107#endif /* DEBUG */
5108
5109
0b4c1975 5110 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
b0d623f7
A
5111
5112 clock_get_uptime(&start);
5113
5114 hibernate_page_list_zero(page_list);
5115 hibernate_page_list_zero(page_list_wired);
6d2010ae 5116 hibernate_page_list_zero(page_list_pal);
b0d623f7 5117
0b4c1975
A
5118 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5119 hibernate_stats.cd_pages = pages;
5120
b0d623f7
A
5121 if (vm_page_local_q) {
5122 for (i = 0; i < vm_page_local_q_count; i++)
5123 vm_page_reactivate_local(i, TRUE, TRUE);
5124 }
5125
5126 m = (vm_page_t) hibernate_gobble_queue;
5127 while(m)
5128 {
5129 pages--;
5130 count_wire--;
5131 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5132 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5133 m = (vm_page_t) m->pageq.next;
5134 }
6d2010ae 5135
0b4c1975
A
5136 for( i = 0; i < real_ncpus; i++ )
5137 {
5138 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5139 {
5140 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5141 {
5142 pages--;
5143 count_wire--;
5144 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5145 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5146
5147 hibernate_stats.cd_local_free++;
5148 hibernate_stats.cd_total_free++;
5149 }
5150 }
5151 }
6d2010ae 5152
b0d623f7
A
5153 for( i = 0; i < vm_colors; i++ )
5154 {
5155 queue_iterate(&vm_page_queue_free[i],
5156 m,
5157 vm_page_t,
5158 pageq)
5159 {
5160 pages--;
5161 count_wire--;
5162 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5163 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
0b4c1975
A
5164
5165 hibernate_stats.cd_total_free++;
b0d623f7
A
5166 }
5167 }
5168
5169 queue_iterate(&vm_lopage_queue_free,
5170 m,
5171 vm_page_t,
5172 pageq)
5173 {
5174 pages--;
5175 count_wire--;
5176 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5177 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
0b4c1975
A
5178
5179 hibernate_stats.cd_total_free++;
b0d623f7
A
5180 }
5181
5182 queue_iterate( &vm_page_queue_throttled,
5183 m,
5184 vm_page_t,
5185 pageq )
5186 {
5187 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5188 && hibernate_consider_discard(m))
5189 {
5190 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5191 count_discard_inactive++;
5192 }
5193 else
5194 count_throttled++;
5195 count_wire--;
5196 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5197 }
5198
316670eb 5199 queue_iterate( &vm_page_queue_anonymous,
b0d623f7
A
5200 m,
5201 vm_page_t,
5202 pageq )
5203 {
5204 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5205 && hibernate_consider_discard(m))
5206 {
5207 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5208 if (m->dirty)
5209 count_discard_purgeable++;
5210 else
5211 count_discard_inactive++;
5212 }
5213 else
5214 count_zf++;
5215 count_wire--;
5216 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5217 }
5218
5219 queue_iterate( &vm_page_queue_inactive,
5220 m,
5221 vm_page_t,
5222 pageq )
5223 {
5224 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5225 && hibernate_consider_discard(m))
5226 {
5227 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5228 if (m->dirty)
5229 count_discard_purgeable++;
5230 else
5231 count_discard_inactive++;
5232 }
5233 else
5234 count_inactive++;
5235 count_wire--;
5236 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5237 }
5238
316670eb
A
5239 queue_iterate( &vm_page_queue_cleaned,
5240 m,
5241 vm_page_t,
5242 pageq )
5243 {
5244 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5245 && hibernate_consider_discard(m))
5246 {
5247 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5248 if (m->dirty)
5249 count_discard_purgeable++;
5250 else
5251 count_discard_cleaned++;
5252 }
5253 else
5254 count_cleaned++;
5255 count_wire--;
5256 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5257 }
5258
b0d623f7
A
5259 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5260 {
5261 queue_iterate(&vm_page_queue_speculative[i].age_q,
5262 m,
5263 vm_page_t,
5264 pageq)
5265 {
5266 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5267 && hibernate_consider_discard(m))
5268 {
5269 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5270 count_discard_speculative++;
5271 }
5272 else
5273 count_speculative++;
5274 count_wire--;
5275 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5276 }
5277 }
5278
5279 queue_iterate( &vm_page_queue_active,
5280 m,
5281 vm_page_t,
5282 pageq )
5283 {
5284 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5285 && hibernate_consider_discard(m))
5286 {
5287 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5288 if (m->dirty)
5289 count_discard_purgeable++;
5290 else
5291 count_discard_active++;
5292 }
5293 else
5294 count_active++;
5295 count_wire--;
5296 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5297 }
5298
5299 // pull wired from hibernate_bitmap
5300
5301 bitmap = &page_list->bank_bitmap[0];
5302 bitmap_wired = &page_list_wired->bank_bitmap[0];
5303 for (bank = 0; bank < page_list->bank_count; bank++)
5304 {
5305 for (i = 0; i < bitmap->bitmapwords; i++)
5306 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5307 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5308 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5309 }
5310
5311 // machine dependent adjustments
5312 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5313
0b4c1975 5314 hibernate_stats.cd_count_wire = count_wire;
316670eb 5315 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative + count_discard_cleaned;
0b4c1975 5316
b0d623f7
A
5317 clock_get_uptime(&end);
5318 absolutetime_to_nanoseconds(end - start, &nsec);
5319 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5320
316670eb
A
5321 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5322 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_zf, count_throttled,
5323 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7 5324
316670eb
A
5325 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5326
5327#if DEBUG
5328 if (vm_page_local_q) {
5329 for (i = 0; i < vm_page_local_q_count; i++) {
5330 struct vpl *lq;
5331 lq = &vm_page_local_q[i].vpl_un.vpl;
5332 VPL_UNLOCK(&lq->vpl_lock);
5333 }
5334 }
5335 vm_page_unlock_queues();
5336#endif /* DEBUG */
0b4c1975
A
5337
5338 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
b0d623f7
A
5339}
5340
5341void
5342hibernate_page_list_discard(hibernate_page_list_t * page_list)
5343{
5344 uint64_t start, end, nsec;
5345 vm_page_t m;
5346 vm_page_t next;
5347 uint32_t i;
5348 uint32_t count_discard_active = 0;
5349 uint32_t count_discard_inactive = 0;
5350 uint32_t count_discard_purgeable = 0;
316670eb 5351 uint32_t count_discard_cleaned = 0;
b0d623f7
A
5352 uint32_t count_discard_speculative = 0;
5353
316670eb
A
5354#if DEBUG
5355 vm_page_lock_queues();
5356 if (vm_page_local_q) {
5357 for (i = 0; i < vm_page_local_q_count; i++) {
5358 struct vpl *lq;
5359 lq = &vm_page_local_q[i].vpl_un.vpl;
5360 VPL_LOCK(&lq->vpl_lock);
5361 }
5362 }
5363#endif /* DEBUG */
5364
b0d623f7
A
5365 clock_get_uptime(&start);
5366
316670eb
A
5367 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5368 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
b0d623f7
A
5369 {
5370 next = (vm_page_t) m->pageq.next;
5371 if (hibernate_page_bittst(page_list, m->phys_page))
5372 {
5373 if (m->dirty)
5374 count_discard_purgeable++;
5375 else
5376 count_discard_inactive++;
5377 hibernate_discard_page(m);
5378 }
5379 m = next;
5380 }
5381
5382 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5383 {
5384 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5385 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5386 {
5387 next = (vm_page_t) m->pageq.next;
5388 if (hibernate_page_bittst(page_list, m->phys_page))
5389 {
5390 count_discard_speculative++;
5391 hibernate_discard_page(m);
5392 }
5393 m = next;
5394 }
5395 }
5396
5397 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5398 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5399 {
5400 next = (vm_page_t) m->pageq.next;
5401 if (hibernate_page_bittst(page_list, m->phys_page))
5402 {
5403 if (m->dirty)
5404 count_discard_purgeable++;
5405 else
5406 count_discard_inactive++;
5407 hibernate_discard_page(m);
5408 }
5409 m = next;
5410 }
5411
5412 m = (vm_page_t) queue_first(&vm_page_queue_active);
5413 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5414 {
5415 next = (vm_page_t) m->pageq.next;
5416 if (hibernate_page_bittst(page_list, m->phys_page))
5417 {
5418 if (m->dirty)
5419 count_discard_purgeable++;
5420 else
5421 count_discard_active++;
5422 hibernate_discard_page(m);
5423 }
5424 m = next;
5425 }
5426
316670eb
A
5427 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5428 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5429 {
5430 next = (vm_page_t) m->pageq.next;
5431 if (hibernate_page_bittst(page_list, m->phys_page))
5432 {
5433 if (m->dirty)
5434 count_discard_purgeable++;
5435 else
5436 count_discard_cleaned++;
5437 hibernate_discard_page(m);
5438 }
5439 m = next;
5440 }
5441
5442#if DEBUG
5443 if (vm_page_local_q) {
5444 for (i = 0; i < vm_page_local_q_count; i++) {
5445 struct vpl *lq;
5446 lq = &vm_page_local_q[i].vpl_un.vpl;
5447 VPL_UNLOCK(&lq->vpl_lock);
5448 }
5449 }
5450 vm_page_unlock_queues();
5451#endif /* DEBUG */
5452
b0d623f7
A
5453 clock_get_uptime(&end);
5454 absolutetime_to_nanoseconds(end - start, &nsec);
316670eb 5455 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
b0d623f7 5456 nsec / 1000000ULL,
316670eb 5457 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
b0d623f7
A
5458}
5459
d1ecb069
A
5460#endif /* HIBERNATION */
5461
b0d623f7 5462/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
5463
5464#include <mach_vm_debug.h>
5465#if MACH_VM_DEBUG
5466
5467#include <mach_debug/hash_info.h>
5468#include <vm/vm_debug.h>
5469
5470/*
5471 * Routine: vm_page_info
5472 * Purpose:
5473 * Return information about the global VP table.
5474 * Fills the buffer with as much information as possible
5475 * and returns the desired size of the buffer.
5476 * Conditions:
5477 * Nothing locked. The caller should provide
5478 * possibly-pageable memory.
5479 */
5480
5481unsigned int
5482vm_page_info(
5483 hash_info_bucket_t *info,
5484 unsigned int count)
5485{
91447636 5486 unsigned int i;
b0d623f7 5487 lck_spin_t *bucket_lock;
1c79356b
A
5488
5489 if (vm_page_bucket_count < count)
5490 count = vm_page_bucket_count;
5491
5492 for (i = 0; i < count; i++) {
5493 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5494 unsigned int bucket_count = 0;
5495 vm_page_t m;
5496
b0d623f7
A
5497 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5498 lck_spin_lock(bucket_lock);
5499
1c79356b
A
5500 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5501 bucket_count++;
b0d623f7
A
5502
5503 lck_spin_unlock(bucket_lock);
1c79356b
A
5504
5505 /* don't touch pageable memory while holding locks */
5506 info[i].hib_count = bucket_count;
5507 }
5508
5509 return vm_page_bucket_count;
5510}
5511#endif /* MACH_VM_DEBUG */