]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-1504.7.4.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
91447636 67
9bccf70c 68#include <mach/clock_types.h>
1c79356b
A
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
2d21ac55 71#include <mach/sdt.h>
1c79356b
A
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
b0d623f7 76#include <kern/kalloc.h>
1c79356b
A
77#include <kern/zalloc.h>
78#include <kern/xpr.h>
79#include <vm/pmap.h>
80#include <vm/vm_init.h>
81#include <vm/vm_map.h>
82#include <vm/vm_page.h>
83#include <vm/vm_pageout.h>
84#include <vm/vm_kern.h> /* kernel_memory_allocate() */
85#include <kern/misc_protos.h>
86#include <zone_debug.h>
87#include <vm/cpm.h>
55e303ae
A
88#include <ppc/mappings.h> /* (BRINGUP) */
89#include <pexpert/pexpert.h> /* (BRINGUP) */
90
91447636 91#include <vm/vm_protos.h>
2d21ac55
A
92#include <vm/memory_object.h>
93#include <vm/vm_purgeable_internal.h>
94
b0d623f7
A
95#include <IOKit/IOHibernatePrivate.h>
96
97
2d21ac55
A
98#if CONFIG_EMBEDDED
99#include <sys/kern_memorystatus.h>
100#endif
101
b0d623f7
A
102#include <sys/kdebug.h>
103
104boolean_t vm_page_free_verify = TRUE;
105
106int speculative_age_index = 0;
107int speculative_steal_index = 0;
108lck_mtx_ext_t vm_page_queue_lock_ext;
109lck_mtx_ext_t vm_page_queue_free_lock_ext;
110lck_mtx_ext_t vm_purgeable_queue_lock_ext;
2d21ac55
A
111
112struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
113
0b4e3aa0 114
b0d623f7
A
115__private_extern__ void vm_page_init_lck_grp(void);
116
117static void vm_page_free_prepare(vm_page_t page);
118
119
120
1c79356b
A
121/*
122 * Associated with page of user-allocatable memory is a
123 * page structure.
124 */
125
126/*
127 * These variables record the values returned by vm_page_bootstrap,
128 * for debugging purposes. The implementation of pmap_steal_memory
129 * and pmap_startup here also uses them internally.
130 */
131
132vm_offset_t virtual_space_start;
133vm_offset_t virtual_space_end;
134int vm_page_pages;
135
136/*
137 * The vm_page_lookup() routine, which provides for fast
138 * (virtual memory object, offset) to page lookup, employs
139 * the following hash table. The vm_page_{insert,remove}
140 * routines install and remove associations in the table.
141 * [This table is often called the virtual-to-physical,
142 * or VP, table.]
143 */
144typedef struct {
145 vm_page_t pages;
146#if MACH_PAGE_HASH_STATS
147 int cur_count; /* current count */
148 int hi_count; /* high water mark */
149#endif /* MACH_PAGE_HASH_STATS */
150} vm_page_bucket_t;
151
b0d623f7
A
152
153#define BUCKETS_PER_LOCK 16
154
1c79356b
A
155vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
156unsigned int vm_page_bucket_count = 0; /* How big is array? */
157unsigned int vm_page_hash_mask; /* Mask for hash function */
158unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 159uint32_t vm_page_bucket_hash; /* Basic bucket hash */
b0d623f7
A
160unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
161
162lck_spin_t *vm_page_bucket_locks;
1c79356b 163
91447636 164
1c79356b
A
165#if MACH_PAGE_HASH_STATS
166/* This routine is only for debug. It is intended to be called by
167 * hand by a developer using a kernel debugger. This routine prints
168 * out vm_page_hash table statistics to the kernel debug console.
169 */
170void
171hash_debug(void)
172{
173 int i;
174 int numbuckets = 0;
175 int highsum = 0;
176 int maxdepth = 0;
177
178 for (i = 0; i < vm_page_bucket_count; i++) {
179 if (vm_page_buckets[i].hi_count) {
180 numbuckets++;
181 highsum += vm_page_buckets[i].hi_count;
182 if (vm_page_buckets[i].hi_count > maxdepth)
183 maxdepth = vm_page_buckets[i].hi_count;
184 }
185 }
186 printf("Total number of buckets: %d\n", vm_page_bucket_count);
187 printf("Number used buckets: %d = %d%%\n",
188 numbuckets, 100*numbuckets/vm_page_bucket_count);
189 printf("Number unused buckets: %d = %d%%\n",
190 vm_page_bucket_count - numbuckets,
191 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
192 printf("Sum of bucket max depth: %d\n", highsum);
193 printf("Average bucket depth: %d.%2d\n",
194 highsum/vm_page_bucket_count,
195 highsum%vm_page_bucket_count);
196 printf("Maximum bucket depth: %d\n", maxdepth);
197}
198#endif /* MACH_PAGE_HASH_STATS */
199
200/*
201 * The virtual page size is currently implemented as a runtime
202 * variable, but is constant once initialized using vm_set_page_size.
203 * This initialization must be done in the machine-dependent
204 * bootstrap sequence, before calling other machine-independent
205 * initializations.
206 *
207 * All references to the virtual page size outside this
208 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
209 * constants.
210 */
55e303ae
A
211vm_size_t page_size = PAGE_SIZE;
212vm_size_t page_mask = PAGE_MASK;
2d21ac55 213int page_shift = PAGE_SHIFT;
1c79356b
A
214
215/*
216 * Resident page structures are initialized from
217 * a template (see vm_page_alloc).
218 *
219 * When adding a new field to the virtual memory
220 * object structure, be sure to add initialization
221 * (see vm_page_bootstrap).
222 */
223struct vm_page vm_page_template;
224
2d21ac55
A
225vm_page_t vm_pages = VM_PAGE_NULL;
226unsigned int vm_pages_count = 0;
227
1c79356b
A
228/*
229 * Resident pages that represent real memory
2d21ac55
A
230 * are allocated from a set of free lists,
231 * one per color.
1c79356b 232 */
2d21ac55
A
233unsigned int vm_colors;
234unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
235unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
236queue_head_t vm_page_queue_free[MAX_COLORS];
1c79356b 237vm_page_t vm_page_queue_fictitious;
1c79356b 238unsigned int vm_page_free_wanted;
2d21ac55 239unsigned int vm_page_free_wanted_privileged;
91447636
A
240unsigned int vm_page_free_count;
241unsigned int vm_page_fictitious_count;
1c79356b
A
242
243unsigned int vm_page_free_count_minimum; /* debugging */
244
245/*
246 * Occasionally, the virtual memory system uses
247 * resident page structures that do not refer to
248 * real pages, for example to leave a page with
249 * important state information in the VP table.
250 *
251 * These page structures are allocated the way
252 * most other kernel structures are.
253 */
254zone_t vm_page_zone;
b0d623f7
A
255vm_locks_array_t vm_page_locks;
256decl_lck_mtx_data(,vm_page_alloc_lock)
9bccf70c 257unsigned int io_throttle_zero_fill;
1c79356b 258
b0d623f7
A
259unsigned int vm_page_local_q_count = 0;
260unsigned int vm_page_local_q_soft_limit = 250;
261unsigned int vm_page_local_q_hard_limit = 500;
262struct vplq *vm_page_local_q = NULL;
263
1c79356b
A
264/*
265 * Fictitious pages don't have a physical address,
55e303ae 266 * but we must initialize phys_page to something.
1c79356b
A
267 * For debugging, this should be a strange value
268 * that the pmap module can recognize in assertions.
269 */
b0d623f7 270ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
1c79356b 271
2d21ac55
A
272/*
273 * Guard pages are not accessible so they don't
274 * need a physical address, but we need to enter
275 * one in the pmap.
276 * Let's make it recognizable and make sure that
277 * we don't use a real physical page with that
278 * physical address.
279 */
b0d623f7 280ppnum_t vm_page_guard_addr = (ppnum_t) -2;
2d21ac55 281
1c79356b
A
282/*
283 * Resident page structures are also chained on
284 * queues that are used by the page replacement
285 * system (pageout daemon). These queues are
286 * defined here, but are shared by the pageout
9bccf70c
A
287 * module. The inactive queue is broken into
288 * inactive and zf for convenience as the
289 * pageout daemon often assignes a higher
290 * affinity to zf pages
1c79356b
A
291 */
292queue_head_t vm_page_queue_active;
293queue_head_t vm_page_queue_inactive;
2d21ac55 294queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
b0d623f7 295queue_head_t vm_page_queue_throttled;
2d21ac55 296
91447636
A
297unsigned int vm_page_active_count;
298unsigned int vm_page_inactive_count;
2d21ac55
A
299unsigned int vm_page_throttled_count;
300unsigned int vm_page_speculative_count;
91447636
A
301unsigned int vm_page_wire_count;
302unsigned int vm_page_gobble_count = 0;
303unsigned int vm_page_wire_count_warning = 0;
304unsigned int vm_page_gobble_count_warning = 0;
305
306unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
b0d623f7 307unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
91447636 308uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 309
b0d623f7 310#if DEVELOPMENT || DEBUG
2d21ac55
A
311unsigned int vm_page_speculative_recreated = 0;
312unsigned int vm_page_speculative_created = 0;
313unsigned int vm_page_speculative_used = 0;
b0d623f7 314#endif
2d21ac55 315
0c530ab8
A
316ppnum_t vm_lopage_poolstart = 0;
317ppnum_t vm_lopage_poolend = 0;
318int vm_lopage_poolsize = 0;
319uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
320
321
1c79356b
A
322/*
323 * Several page replacement parameters are also
324 * shared with this module, so that page allocation
325 * (done here in vm_page_alloc) can trigger the
326 * pageout daemon.
327 */
91447636
A
328unsigned int vm_page_free_target = 0;
329unsigned int vm_page_free_min = 0;
b0d623f7
A
330unsigned int vm_page_throttle_limit = 0;
331uint32_t vm_page_creation_throttle = 0;
91447636 332unsigned int vm_page_inactive_target = 0;
2d21ac55 333unsigned int vm_page_inactive_min = 0;
91447636 334unsigned int vm_page_free_reserved = 0;
b0d623f7 335unsigned int vm_page_throttle_count = 0;
1c79356b
A
336
337/*
338 * The VM system has a couple of heuristics for deciding
339 * that pages are "uninteresting" and should be placed
340 * on the inactive queue as likely candidates for replacement.
341 * These variables let the heuristics be controlled at run-time
342 * to make experimentation easier.
343 */
344
345boolean_t vm_page_deactivate_hint = TRUE;
346
b0d623f7
A
347struct vm_page_stats_reusable vm_page_stats_reusable;
348
1c79356b
A
349/*
350 * vm_set_page_size:
351 *
352 * Sets the page size, perhaps based upon the memory
353 * size. Must be called before any use of page-size
354 * dependent functions.
355 *
356 * Sets page_shift and page_mask from page_size.
357 */
358void
359vm_set_page_size(void)
360{
1c79356b
A
361 page_mask = page_size - 1;
362
363 if ((page_mask & page_size) != 0)
364 panic("vm_set_page_size: page size not a power of two");
365
366 for (page_shift = 0; ; page_shift++)
91447636 367 if ((1U << page_shift) == page_size)
1c79356b 368 break;
1c79356b
A
369}
370
2d21ac55
A
371
372/* Called once during statup, once the cache geometry is known.
373 */
374static void
375vm_page_set_colors( void )
376{
377 unsigned int n, override;
378
593a1d5f 379 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
380 n = override;
381 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
382 n = vm_cache_geometry_colors;
383 else n = DEFAULT_COLORS; /* use default if all else fails */
384
385 if ( n == 0 )
386 n = 1;
387 if ( n > MAX_COLORS )
388 n = MAX_COLORS;
389
390 /* the count must be a power of 2 */
b0d623f7 391 if ( ( n & (n - 1)) != 0 )
2d21ac55
A
392 panic("vm_page_set_colors");
393
394 vm_colors = n;
395 vm_color_mask = n - 1;
396}
397
398
b0d623f7
A
399lck_grp_t vm_page_lck_grp_free;
400lck_grp_t vm_page_lck_grp_queue;
401lck_grp_t vm_page_lck_grp_local;
402lck_grp_t vm_page_lck_grp_purge;
403lck_grp_t vm_page_lck_grp_alloc;
404lck_grp_t vm_page_lck_grp_bucket;
405lck_grp_attr_t vm_page_lck_grp_attr;
406lck_attr_t vm_page_lck_attr;
407
408
409__private_extern__ void
410vm_page_init_lck_grp(void)
411{
412 /*
413 * initialze the vm_page lock world
414 */
415 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
416 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
417 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
418 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
419 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
420 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
421 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
422 lck_attr_setdefault(&vm_page_lck_attr);
423}
424
425void
426vm_page_init_local_q()
427{
428 unsigned int num_cpus;
429 unsigned int i;
430 struct vplq *t_local_q;
431
432 num_cpus = ml_get_max_cpus();
433
434 /*
435 * no point in this for a uni-processor system
436 */
437 if (num_cpus >= 2) {
438 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
439
440 for (i = 0; i < num_cpus; i++) {
441 struct vpl *lq;
442
443 lq = &t_local_q[i].vpl_un.vpl;
444 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
445 queue_init(&lq->vpl_queue);
446 lq->vpl_count = 0;
447 }
448 vm_page_local_q_count = num_cpus;
449
450 vm_page_local_q = (struct vplq *)t_local_q;
451 }
452}
453
454
1c79356b
A
455/*
456 * vm_page_bootstrap:
457 *
458 * Initializes the resident memory module.
459 *
460 * Allocates memory for the page cells, and
461 * for the object/offset-to-page hash table headers.
462 * Each page cell is initialized and placed on the free list.
463 * Returns the range of available kernel virtual memory.
464 */
465
466void
467vm_page_bootstrap(
468 vm_offset_t *startp,
469 vm_offset_t *endp)
470{
471 register vm_page_t m;
91447636 472 unsigned int i;
1c79356b
A
473 unsigned int log1;
474 unsigned int log2;
475 unsigned int size;
476
477 /*
478 * Initialize the vm_page template.
479 */
480
481 m = &vm_page_template;
b0d623f7 482 bzero(m, sizeof (*m));
1c79356b 483
91447636
A
484 m->pageq.next = NULL;
485 m->pageq.prev = NULL;
486 m->listq.next = NULL;
487 m->listq.prev = NULL;
b0d623f7 488 m->next = VM_PAGE_NULL;
91447636 489
b0d623f7
A
490 m->object = VM_OBJECT_NULL; /* reset later */
491 m->offset = (vm_object_offset_t) -1; /* reset later */
492
493 m->wire_count = 0;
494 m->local = FALSE;
1c79356b
A
495 m->inactive = FALSE;
496 m->active = FALSE;
b0d623f7
A
497 m->pageout_queue = FALSE;
498 m->speculative = FALSE;
1c79356b
A
499 m->laundry = FALSE;
500 m->free = FALSE;
501 m->reference = FALSE;
b0d623f7
A
502 m->gobbled = FALSE;
503 m->private = FALSE;
504 m->throttled = FALSE;
505 m->__unused_pageq_bits = 0;
506
507 m->phys_page = 0; /* reset later */
1c79356b
A
508
509 m->busy = TRUE;
510 m->wanted = FALSE;
511 m->tabled = FALSE;
512 m->fictitious = FALSE;
b0d623f7
A
513 m->pmapped = FALSE;
514 m->wpmapped = FALSE;
515 m->pageout = FALSE;
1c79356b
A
516 m->absent = FALSE;
517 m->error = FALSE;
518 m->dirty = FALSE;
519 m->cleaning = FALSE;
520 m->precious = FALSE;
521 m->clustered = FALSE;
b0d623f7 522 m->overwriting = FALSE;
1c79356b 523 m->restart = FALSE;
b0d623f7 524 m->unusual = FALSE;
91447636 525 m->encrypted = FALSE;
2d21ac55 526 m->encrypted_cleaning = FALSE;
b0d623f7
A
527 m->list_req_pending = FALSE;
528 m->dump_cleaning = FALSE;
529 m->cs_validated = FALSE;
530 m->cs_tainted = FALSE;
531 m->no_cache = FALSE;
532 m->zero_fill = FALSE;
533 m->reusable = FALSE;
534 m->__unused_object_bits = 0;
1c79356b 535
1c79356b 536
1c79356b
A
537 /*
538 * Initialize the page queues.
539 */
b0d623f7
A
540 vm_page_init_lck_grp();
541
542 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
543 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
544 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
2d21ac55
A
545
546 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
547 int group;
548
549 purgeable_queues[i].token_q_head = 0;
550 purgeable_queues[i].token_q_tail = 0;
551 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
552 queue_init(&purgeable_queues[i].objq[group]);
553
554 purgeable_queues[i].type = i;
555 purgeable_queues[i].new_pages = 0;
556#if MACH_ASSERT
557 purgeable_queues[i].debug_count_tokens = 0;
558 purgeable_queues[i].debug_count_objects = 0;
559#endif
560 };
561
562 for (i = 0; i < MAX_COLORS; i++ )
563 queue_init(&vm_page_queue_free[i]);
564 queue_init(&vm_lopage_queue_free);
1c79356b
A
565 vm_page_queue_fictitious = VM_PAGE_NULL;
566 queue_init(&vm_page_queue_active);
567 queue_init(&vm_page_queue_inactive);
2d21ac55 568 queue_init(&vm_page_queue_throttled);
9bccf70c 569 queue_init(&vm_page_queue_zf);
1c79356b 570
2d21ac55
A
571 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
572 queue_init(&vm_page_queue_speculative[i].age_q);
573
574 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
575 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
576 }
1c79356b 577 vm_page_free_wanted = 0;
2d21ac55
A
578 vm_page_free_wanted_privileged = 0;
579
580 vm_page_set_colors();
581
1c79356b
A
582
583 /*
584 * Steal memory for the map and zone subsystems.
585 */
586
587 vm_map_steal_memory();
588 zone_steal_memory();
589
590 /*
591 * Allocate (and initialize) the virtual-to-physical
592 * table hash buckets.
593 *
594 * The number of buckets should be a power of two to
595 * get a good hash function. The following computation
596 * chooses the first power of two that is greater
597 * than the number of physical pages in the system.
598 */
599
1c79356b
A
600 if (vm_page_bucket_count == 0) {
601 unsigned int npages = pmap_free_pages();
602
603 vm_page_bucket_count = 1;
604 while (vm_page_bucket_count < npages)
605 vm_page_bucket_count <<= 1;
606 }
b0d623f7 607 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
1c79356b
A
608
609 vm_page_hash_mask = vm_page_bucket_count - 1;
610
611 /*
612 * Calculate object shift value for hashing algorithm:
613 * O = log2(sizeof(struct vm_object))
614 * B = log2(vm_page_bucket_count)
615 * hash shifts the object left by
616 * B/2 - O
617 */
618 size = vm_page_bucket_count;
619 for (log1 = 0; size > 1; log1++)
620 size /= 2;
621 size = sizeof(struct vm_object);
622 for (log2 = 0; size > 1; log2++)
623 size /= 2;
624 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
625
626 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
627 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
628 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
629
630 if (vm_page_hash_mask & vm_page_bucket_count)
631 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
632
633 vm_page_buckets = (vm_page_bucket_t *)
634 pmap_steal_memory(vm_page_bucket_count *
635 sizeof(vm_page_bucket_t));
636
b0d623f7
A
637 vm_page_bucket_locks = (lck_spin_t *)
638 pmap_steal_memory(vm_page_bucket_lock_count *
639 sizeof(lck_spin_t));
640
1c79356b
A
641 for (i = 0; i < vm_page_bucket_count; i++) {
642 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
643
644 bucket->pages = VM_PAGE_NULL;
645#if MACH_PAGE_HASH_STATS
646 bucket->cur_count = 0;
647 bucket->hi_count = 0;
648#endif /* MACH_PAGE_HASH_STATS */
649 }
650
b0d623f7
A
651 for (i = 0; i < vm_page_bucket_lock_count; i++)
652 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
653
1c79356b
A
654 /*
655 * Machine-dependent code allocates the resident page table.
656 * It uses vm_page_init to initialize the page frames.
657 * The code also returns to us the virtual space available
658 * to the kernel. We don't trust the pmap module
659 * to get the alignment right.
660 */
661
662 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
663 virtual_space_start = round_page(virtual_space_start);
664 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
665
666 *startp = virtual_space_start;
667 *endp = virtual_space_end;
668
669 /*
670 * Compute the initial "wire" count.
671 * Up until now, the pages which have been set aside are not under
672 * the VM system's control, so although they aren't explicitly
673 * wired, they nonetheless can't be moved. At this moment,
674 * all VM managed pages are "free", courtesy of pmap_startup.
675 */
b0d623f7
A
676 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
677 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count; /* initial value */
1c79356b 678 vm_page_free_count_minimum = vm_page_free_count;
91447636 679
2d21ac55
A
680 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
681 vm_page_free_count, vm_page_wire_count);
682
91447636 683 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
684}
685
686#ifndef MACHINE_PAGES
687/*
688 * We implement pmap_steal_memory and pmap_startup with the help
689 * of two simpler functions, pmap_virtual_space and pmap_next_page.
690 */
691
91447636 692void *
1c79356b
A
693pmap_steal_memory(
694 vm_size_t size)
695{
55e303ae
A
696 vm_offset_t addr, vaddr;
697 ppnum_t phys_page;
1c79356b
A
698
699 /*
700 * We round the size to a round multiple.
701 */
702
703 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
704
705 /*
706 * If this is the first call to pmap_steal_memory,
707 * we have to initialize ourself.
708 */
709
710 if (virtual_space_start == virtual_space_end) {
711 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
712
713 /*
714 * The initial values must be aligned properly, and
715 * we don't trust the pmap module to do it right.
716 */
717
91447636
A
718 virtual_space_start = round_page(virtual_space_start);
719 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
720 }
721
722 /*
723 * Allocate virtual memory for this request.
724 */
725
726 addr = virtual_space_start;
727 virtual_space_start += size;
728
b0d623f7 729 kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
1c79356b
A
730
731 /*
732 * Allocate and map physical pages to back new virtual pages.
733 */
734
91447636 735 for (vaddr = round_page(addr);
1c79356b
A
736 vaddr < addr + size;
737 vaddr += PAGE_SIZE) {
b0d623f7
A
738#if defined(__LP64__)
739 if (!pmap_next_page_k64(&phys_page))
740#else
55e303ae 741 if (!pmap_next_page(&phys_page))
b0d623f7
A
742#endif
743
1c79356b
A
744 panic("pmap_steal_memory");
745
746 /*
747 * XXX Logically, these mappings should be wired,
748 * but some pmap modules barf if they are.
749 */
b0d623f7
A
750#if defined(__LP64__)
751 pmap_pre_expand(kernel_pmap, vaddr);
752#endif
1c79356b 753
55e303ae 754 pmap_enter(kernel_pmap, vaddr, phys_page,
9bccf70c
A
755 VM_PROT_READ|VM_PROT_WRITE,
756 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
757 /*
758 * Account for newly stolen memory
759 */
760 vm_page_wire_count++;
761
762 }
763
91447636 764 return (void *) addr;
1c79356b
A
765}
766
767void
768pmap_startup(
769 vm_offset_t *startp,
770 vm_offset_t *endp)
771{
55e303ae 772 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
773 ppnum_t phys_page;
774 addr64_t tmpaddr;
0c530ab8
A
775 unsigned int num_of_lopages = 0;
776 unsigned int last_index;
1c79356b
A
777
778 /*
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
781 */
782
55e303ae 783 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
b0d623f7 784 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 785 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 786
2d21ac55 787 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
788
789 /*
790 * Initialize the page frames.
791 */
1c79356b 792 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 793 if (!pmap_next_page(&phys_page))
1c79356b
A
794 break;
795
2d21ac55 796 vm_page_init(&vm_pages[i], phys_page);
1c79356b
A
797 vm_page_pages++;
798 pages_initialized++;
799 }
2d21ac55 800 vm_pages_count = pages_initialized;
1c79356b 801
0c530ab8
A
802 /*
803 * Check if we want to initialize pages to a known value
804 */
805 fill = 0; /* Assume no fill */
593a1d5f 806 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
2d21ac55 807
0c530ab8
A
808
809 /*
810 * if vm_lopage_poolsize is non-zero, than we need to reserve
811 * a pool of pages whose addresess are less than 4G... this pool
812 * is used by drivers whose hardware can't DMA beyond 32 bits...
813 *
814 * note that I'm assuming that the page list is ascending and
815 * ordered w/r to the physical address
816 */
817 for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
818 vm_page_t m;
819
2d21ac55 820 m = &vm_pages[i];
0c530ab8
A
821
822 if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
823 panic("couldn't reserve the lopage pool: not enough lo pages\n");
824
825 if (m->phys_page < vm_lopage_poolend)
826 panic("couldn't reserve the lopage pool: page list out of order\n");
827
828 vm_lopage_poolend = m->phys_page;
829
830 if (vm_lopage_poolstart == 0)
831 vm_lopage_poolstart = m->phys_page;
832 else {
833 if (m->phys_page < vm_lopage_poolstart)
834 panic("couldn't reserve the lopage pool: page list out of order\n");
835 }
836
837 if (fill)
838 fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
839
840 vm_page_release(m);
841 }
842 last_index = i;
843
844 // -debug code remove
845 if (2 == vm_himemory_mode) {
846 // free low -> high so high is preferred
847 for (i = last_index + 1; i <= pages_initialized; i++) {
2d21ac55
A
848 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
849 vm_page_release(&vm_pages[i - 1]);
0c530ab8
A
850 }
851 }
852 else
853 // debug code remove-
854
1c79356b
A
855 /*
856 * Release pages in reverse order so that physical pages
857 * initially get allocated in ascending addresses. This keeps
858 * the devices (which must address physical memory) happy if
859 * they require several consecutive pages.
860 */
0c530ab8 861 for (i = pages_initialized; i > last_index; i--) {
2d21ac55
A
862 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
863 vm_page_release(&vm_pages[i - 1]);
1c79356b
A
864 }
865
55e303ae
A
866#if 0
867 {
868 vm_page_t xx, xxo, xxl;
2d21ac55 869 int i, j, k, l;
55e303ae
A
870
871 j = 0; /* (BRINGUP) */
872 xxl = 0;
873
2d21ac55
A
874 for( i = 0; i < vm_colors; i++ ) {
875 queue_iterate(&vm_page_queue_free[i],
876 xx,
877 vm_page_t,
878 pageq) { /* BRINGUP */
879 j++; /* (BRINGUP) */
880 if(j > vm_page_free_count) { /* (BRINGUP) */
881 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 882 }
2d21ac55
A
883
884 l = vm_page_free_count - j; /* (BRINGUP) */
885 k = 0; /* (BRINGUP) */
886
887 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
888
889 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
890 k++;
891 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
892 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
893 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
894 }
895 }
896
897 xxl = xx;
55e303ae
A
898 }
899 }
900
901 if(j != vm_page_free_count) { /* (BRINGUP) */
902 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
903 }
904 }
905#endif
906
907
1c79356b
A
908 /*
909 * We have to re-align virtual_space_start,
910 * because pmap_steal_memory has been using it.
911 */
912
b0d623f7 913 virtual_space_start = round_page(virtual_space_start);
1c79356b
A
914
915 *startp = virtual_space_start;
916 *endp = virtual_space_end;
917}
918#endif /* MACHINE_PAGES */
919
920/*
921 * Routine: vm_page_module_init
922 * Purpose:
923 * Second initialization pass, to be done after
924 * the basic VM system is ready.
925 */
926void
927vm_page_module_init(void)
928{
929 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
930 0, PAGE_SIZE, "vm pages");
931
932#if ZONE_DEBUG
933 zone_debug_disable(vm_page_zone);
934#endif /* ZONE_DEBUG */
935
936 zone_change(vm_page_zone, Z_EXPAND, FALSE);
937 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
938 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
939
940 /*
941 * Adjust zone statistics to account for the real pages allocated
942 * in vm_page_create(). [Q: is this really what we want?]
943 */
944 vm_page_zone->count += vm_page_pages;
945 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
946
b0d623f7 947 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
1c79356b
A
948}
949
950/*
951 * Routine: vm_page_create
952 * Purpose:
953 * After the VM system is up, machine-dependent code
954 * may stumble across more physical memory. For example,
955 * memory that it was reserving for a frame buffer.
956 * vm_page_create turns this memory into available pages.
957 */
958
959void
960vm_page_create(
55e303ae
A
961 ppnum_t start,
962 ppnum_t end)
1c79356b 963{
55e303ae
A
964 ppnum_t phys_page;
965 vm_page_t m;
1c79356b 966
55e303ae
A
967 for (phys_page = start;
968 phys_page < end;
969 phys_page++) {
1c79356b
A
970 while ((m = (vm_page_t) vm_page_grab_fictitious())
971 == VM_PAGE_NULL)
972 vm_page_more_fictitious();
973
55e303ae 974 vm_page_init(m, phys_page);
1c79356b
A
975 vm_page_pages++;
976 vm_page_release(m);
977 }
978}
979
980/*
981 * vm_page_hash:
982 *
983 * Distributes the object/offset key pair among hash buckets.
984 *
55e303ae 985 * NOTE: The bucket count must be a power of 2
1c79356b
A
986 */
987#define vm_page_hash(object, offset) (\
b0d623f7 988 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
989 & vm_page_hash_mask)
990
2d21ac55 991
1c79356b
A
992/*
993 * vm_page_insert: [ internal use only ]
994 *
995 * Inserts the given mem entry into the object/object-page
996 * table and object list.
997 *
998 * The object must be locked.
999 */
1c79356b
A
1000void
1001vm_page_insert(
2d21ac55
A
1002 vm_page_t mem,
1003 vm_object_t object,
1004 vm_object_offset_t offset)
1005{
b0d623f7 1006 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
2d21ac55
A
1007}
1008
4a3eedf9 1009void
2d21ac55
A
1010vm_page_insert_internal(
1011 vm_page_t mem,
1012 vm_object_t object,
1013 vm_object_offset_t offset,
b0d623f7
A
1014 boolean_t queues_lock_held,
1015 boolean_t insert_in_hash)
1c79356b 1016{
b0d623f7
A
1017 vm_page_bucket_t *bucket;
1018 lck_spin_t *bucket_lock;
1019 int hash_id;
1c79356b
A
1020
1021 XPR(XPR_VM_PAGE,
1022 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 1023 object, offset, mem, 0,0);
1c79356b
A
1024
1025 VM_PAGE_CHECK(mem);
1026
2d21ac55
A
1027 if (object == vm_submap_object) {
1028 /* the vm_submap_object is only a placeholder for submaps */
1029 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1030 }
1031
1032 vm_object_lock_assert_exclusive(object);
1033#if DEBUG
b0d623f7
A
1034 lck_mtx_assert(&vm_page_queue_lock,
1035 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1036 : LCK_MTX_ASSERT_NOTOWNED);
1037#endif /* DEBUG */
1038
1039 if (insert_in_hash == TRUE) {
1040#if DEBUG
1041 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1042 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1043 "already in (obj=%p,off=0x%llx)",
1044 mem, object, offset, mem->object, mem->offset);
91447636 1045#endif
b0d623f7 1046 assert(!object->internal || offset < object->size);
1c79356b 1047
b0d623f7
A
1048 /* only insert "pageout" pages into "pageout" objects,
1049 * and normal pages into normal objects */
1050 assert(object->pageout == mem->pageout);
91447636 1051
b0d623f7
A
1052 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1053
1054 /*
1055 * Record the object/offset pair in this page
1056 */
1c79356b 1057
b0d623f7
A
1058 mem->object = object;
1059 mem->offset = offset;
1c79356b 1060
b0d623f7
A
1061 /*
1062 * Insert it into the object_object/offset hash table
1063 */
1064 hash_id = vm_page_hash(object, offset);
1065 bucket = &vm_page_buckets[hash_id];
1066 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1067
1068 lck_spin_lock(bucket_lock);
1c79356b 1069
b0d623f7
A
1070 mem->next = bucket->pages;
1071 bucket->pages = mem;
1c79356b 1072#if MACH_PAGE_HASH_STATS
b0d623f7
A
1073 if (++bucket->cur_count > bucket->hi_count)
1074 bucket->hi_count = bucket->cur_count;
1c79356b 1075#endif /* MACH_PAGE_HASH_STATS */
1c79356b 1076
b0d623f7
A
1077 lck_spin_unlock(bucket_lock);
1078 }
1c79356b
A
1079 /*
1080 * Now link into the object's list of backed pages.
1081 */
1082
91447636 1083 VM_PAGE_INSERT(mem, object);
1c79356b
A
1084 mem->tabled = TRUE;
1085
1086 /*
1087 * Show that the object has one more resident page.
1088 */
1089
1090 object->resident_page_count++;
b0d623f7
A
1091 if (VM_PAGE_WIRED(mem)) {
1092 object->wired_page_count++;
1093 }
1094 assert(object->resident_page_count >= object->wired_page_count);
91447636 1095
b0d623f7 1096 assert(!mem->reusable);
2d21ac55 1097
b0d623f7
A
1098 if (object->purgable == VM_PURGABLE_VOLATILE) {
1099 if (VM_PAGE_WIRED(mem)) {
1100 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1101 } else {
1102 OSAddAtomic(1, &vm_page_purgeable_count);
1103 }
593a1d5f
A
1104 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1105 mem->throttled) {
b0d623f7
A
1106 /*
1107 * This page belongs to a purged VM object but hasn't
1108 * been purged (because it was "busy").
1109 * It's in the "throttled" queue and hence not
1110 * visible to vm_pageout_scan(). Move it to a pageable
1111 * queue, so that it can eventually be reclaimed, instead
1112 * of lingering in the "empty" object.
1113 */
593a1d5f 1114 if (queues_lock_held == FALSE)
b0d623f7 1115 vm_page_lockspin_queues();
593a1d5f 1116 vm_page_deactivate(mem);
2d21ac55
A
1117 if (queues_lock_held == FALSE)
1118 vm_page_unlock_queues();
91447636 1119 }
1c79356b
A
1120}
1121
1122/*
1123 * vm_page_replace:
1124 *
1125 * Exactly like vm_page_insert, except that we first
1126 * remove any existing page at the given offset in object.
1127 *
b0d623f7 1128 * The object must be locked.
1c79356b 1129 */
1c79356b
A
1130void
1131vm_page_replace(
1132 register vm_page_t mem,
1133 register vm_object_t object,
1134 register vm_object_offset_t offset)
1135{
0c530ab8
A
1136 vm_page_bucket_t *bucket;
1137 vm_page_t found_m = VM_PAGE_NULL;
b0d623f7
A
1138 lck_spin_t *bucket_lock;
1139 int hash_id;
1c79356b
A
1140
1141 VM_PAGE_CHECK(mem);
2d21ac55 1142 vm_object_lock_assert_exclusive(object);
91447636 1143#if DEBUG
91447636
A
1144 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1145 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1146 "already in (obj=%p,off=0x%llx)",
1147 mem, object, offset, mem->object, mem->offset);
b0d623f7 1148 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
91447636 1149#endif
1c79356b
A
1150 /*
1151 * Record the object/offset pair in this page
1152 */
1153
1154 mem->object = object;
1155 mem->offset = offset;
1156
1157 /*
1158 * Insert it into the object_object/offset hash table,
1159 * replacing any page that might have been there.
1160 */
1161
b0d623f7
A
1162 hash_id = vm_page_hash(object, offset);
1163 bucket = &vm_page_buckets[hash_id];
1164 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1165
1166 lck_spin_lock(bucket_lock);
0c530ab8 1167
1c79356b
A
1168 if (bucket->pages) {
1169 vm_page_t *mp = &bucket->pages;
b0d623f7 1170 vm_page_t m = *mp;
0c530ab8 1171
1c79356b
A
1172 do {
1173 if (m->object == object && m->offset == offset) {
1174 /*
0c530ab8 1175 * Remove old page from hash list
1c79356b
A
1176 */
1177 *mp = m->next;
1c79356b 1178
0c530ab8 1179 found_m = m;
1c79356b
A
1180 break;
1181 }
1182 mp = &m->next;
91447636 1183 } while ((m = *mp));
0c530ab8 1184
1c79356b
A
1185 mem->next = bucket->pages;
1186 } else {
1187 mem->next = VM_PAGE_NULL;
1188 }
0c530ab8
A
1189 /*
1190 * insert new page at head of hash list
1191 */
1c79356b 1192 bucket->pages = mem;
0c530ab8 1193
b0d623f7 1194 lck_spin_unlock(bucket_lock);
1c79356b 1195
0c530ab8
A
1196 if (found_m) {
1197 /*
1198 * there was already a page at the specified
1199 * offset for this object... remove it from
1200 * the object and free it back to the free list
1201 */
b0d623f7 1202 vm_page_free_unlocked(found_m, FALSE);
91447636 1203 }
b0d623f7 1204 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1c79356b
A
1205}
1206
1207/*
1208 * vm_page_remove: [ internal use only ]
1209 *
1210 * Removes the given mem entry from the object/offset-page
1211 * table and the object page list.
1212 *
b0d623f7 1213 * The object must be locked.
1c79356b
A
1214 */
1215
1216void
1217vm_page_remove(
b0d623f7
A
1218 vm_page_t mem,
1219 boolean_t remove_from_hash)
1c79356b 1220{
b0d623f7
A
1221 vm_page_bucket_t *bucket;
1222 vm_page_t this;
1223 lck_spin_t *bucket_lock;
1224 int hash_id;
1c79356b
A
1225
1226 XPR(XPR_VM_PAGE,
1227 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7
A
1228 mem->object, mem->offset,
1229 mem, 0,0);
1230
2d21ac55 1231 vm_object_lock_assert_exclusive(mem->object);
1c79356b
A
1232 assert(mem->tabled);
1233 assert(!mem->cleaning);
1234 VM_PAGE_CHECK(mem);
1235
b0d623f7
A
1236 if (remove_from_hash == TRUE) {
1237 /*
1238 * Remove from the object_object/offset hash table
1239 */
1240 hash_id = vm_page_hash(mem->object, mem->offset);
1241 bucket = &vm_page_buckets[hash_id];
1242 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
91447636 1243
b0d623f7 1244 lck_spin_lock(bucket_lock);
1c79356b 1245
b0d623f7
A
1246 if ((this = bucket->pages) == mem) {
1247 /* optimize for common case */
1c79356b 1248
b0d623f7
A
1249 bucket->pages = mem->next;
1250 } else {
1251 vm_page_t *prev;
1c79356b 1252
b0d623f7
A
1253 for (prev = &this->next;
1254 (this = *prev) != mem;
1255 prev = &this->next)
1256 continue;
1257 *prev = this->next;
1258 }
1c79356b 1259#if MACH_PAGE_HASH_STATS
b0d623f7 1260 bucket->cur_count--;
1c79356b 1261#endif /* MACH_PAGE_HASH_STATS */
1c79356b 1262
b0d623f7
A
1263 lck_spin_unlock(bucket_lock);
1264 }
1c79356b
A
1265 /*
1266 * Now remove from the object's list of backed pages.
1267 */
1268
91447636 1269 VM_PAGE_REMOVE(mem);
1c79356b
A
1270
1271 /*
1272 * And show that the object has one fewer resident
1273 * page.
1274 */
1275
b0d623f7 1276 assert(mem->object->resident_page_count > 0);
1c79356b 1277 mem->object->resident_page_count--;
b0d623f7
A
1278 if (VM_PAGE_WIRED(mem)) {
1279 assert(mem->object->wired_page_count > 0);
1280 mem->object->wired_page_count--;
1281 }
1282 assert(mem->object->resident_page_count >=
1283 mem->object->wired_page_count);
1284 if (mem->reusable) {
1285 assert(mem->object->reusable_page_count > 0);
1286 mem->object->reusable_page_count--;
1287 assert(mem->object->reusable_page_count <=
1288 mem->object->resident_page_count);
1289 mem->reusable = FALSE;
1290 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1291 vm_page_stats_reusable.reused_remove++;
1292 } else if (mem->object->all_reusable) {
1293 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1294 vm_page_stats_reusable.reused_remove++;
1295 }
1c79356b 1296
593a1d5f 1297 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
b0d623f7
A
1298 if (VM_PAGE_WIRED(mem)) {
1299 assert(vm_page_purgeable_wired_count > 0);
1300 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1301 } else {
1302 assert(vm_page_purgeable_count > 0);
1303 OSAddAtomic(-1, &vm_page_purgeable_count);
1304 }
91447636 1305 }
1c79356b
A
1306 mem->tabled = FALSE;
1307 mem->object = VM_OBJECT_NULL;
91447636 1308 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1309}
1310
b0d623f7 1311
1c79356b
A
1312/*
1313 * vm_page_lookup:
1314 *
1315 * Returns the page associated with the object/offset
1316 * pair specified; if none is found, VM_PAGE_NULL is returned.
1317 *
1318 * The object must be locked. No side effects.
1319 */
1320
91447636
A
1321unsigned long vm_page_lookup_hint = 0;
1322unsigned long vm_page_lookup_hint_next = 0;
1323unsigned long vm_page_lookup_hint_prev = 0;
1324unsigned long vm_page_lookup_hint_miss = 0;
2d21ac55
A
1325unsigned long vm_page_lookup_bucket_NULL = 0;
1326unsigned long vm_page_lookup_miss = 0;
1327
91447636 1328
1c79356b
A
1329vm_page_t
1330vm_page_lookup(
b0d623f7
A
1331 vm_object_t object,
1332 vm_object_offset_t offset)
1c79356b 1333{
b0d623f7
A
1334 vm_page_t mem;
1335 vm_page_bucket_t *bucket;
1336 queue_entry_t qe;
1337 lck_spin_t *bucket_lock;
1338 int hash_id;
91447636 1339
2d21ac55 1340 vm_object_lock_assert_held(object);
91447636 1341 mem = object->memq_hint;
2d21ac55 1342
91447636
A
1343 if (mem != VM_PAGE_NULL) {
1344 assert(mem->object == object);
2d21ac55 1345
91447636
A
1346 if (mem->offset == offset) {
1347 vm_page_lookup_hint++;
1348 return mem;
1349 }
1350 qe = queue_next(&mem->listq);
2d21ac55 1351
91447636
A
1352 if (! queue_end(&object->memq, qe)) {
1353 vm_page_t next_page;
1354
1355 next_page = (vm_page_t) qe;
1356 assert(next_page->object == object);
2d21ac55 1357
91447636
A
1358 if (next_page->offset == offset) {
1359 vm_page_lookup_hint_next++;
1360 object->memq_hint = next_page; /* new hint */
1361 return next_page;
1362 }
1363 }
1364 qe = queue_prev(&mem->listq);
2d21ac55 1365
91447636
A
1366 if (! queue_end(&object->memq, qe)) {
1367 vm_page_t prev_page;
1368
1369 prev_page = (vm_page_t) qe;
1370 assert(prev_page->object == object);
2d21ac55 1371
91447636
A
1372 if (prev_page->offset == offset) {
1373 vm_page_lookup_hint_prev++;
1374 object->memq_hint = prev_page; /* new hint */
1375 return prev_page;
1376 }
1377 }
1378 }
1c79356b 1379 /*
2d21ac55 1380 * Search the hash table for this object/offset pair
1c79356b 1381 */
b0d623f7
A
1382 hash_id = vm_page_hash(object, offset);
1383 bucket = &vm_page_buckets[hash_id];
1c79356b 1384
2d21ac55
A
1385 /*
1386 * since we hold the object lock, we are guaranteed that no
1387 * new pages can be inserted into this object... this in turn
1388 * guarantess that the page we're looking for can't exist
1389 * if the bucket it hashes to is currently NULL even when looked
1390 * at outside the scope of the hash bucket lock... this is a
1391 * really cheap optimiztion to avoid taking the lock
1392 */
1393 if (bucket->pages == VM_PAGE_NULL) {
1394 vm_page_lookup_bucket_NULL++;
1395
1396 return (VM_PAGE_NULL);
1397 }
b0d623f7
A
1398 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1399
1400 lck_spin_lock(bucket_lock);
0c530ab8 1401
1c79356b
A
1402 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1403 VM_PAGE_CHECK(mem);
1404 if ((mem->object == object) && (mem->offset == offset))
1405 break;
1406 }
b0d623f7 1407 lck_spin_unlock(bucket_lock);
55e303ae 1408
91447636
A
1409 if (mem != VM_PAGE_NULL) {
1410 if (object->memq_hint != VM_PAGE_NULL) {
1411 vm_page_lookup_hint_miss++;
1412 }
1413 assert(mem->object == object);
1414 object->memq_hint = mem;
2d21ac55
A
1415 } else
1416 vm_page_lookup_miss++;
91447636
A
1417
1418 return(mem);
1419}
1420
1421
1c79356b
A
1422/*
1423 * vm_page_rename:
1424 *
1425 * Move the given memory entry from its
1426 * current object to the specified target object/offset.
1427 *
1428 * The object must be locked.
1429 */
1430void
1431vm_page_rename(
1432 register vm_page_t mem,
1433 register vm_object_t new_object,
2d21ac55
A
1434 vm_object_offset_t new_offset,
1435 boolean_t encrypted_ok)
1c79356b
A
1436{
1437 assert(mem->object != new_object);
2d21ac55 1438
91447636
A
1439 /*
1440 * ENCRYPTED SWAP:
1441 * The encryption key is based on the page's memory object
1442 * (aka "pager") and paging offset. Moving the page to
1443 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
1444 * so it has to be decrypted first, or we would lose the key.
1445 *
1446 * One exception is VM object collapsing, where we transfer pages
1447 * from one backing object to its parent object. This operation also
1448 * transfers the paging information, so the <pager,paging_offset> info
1449 * should remain consistent. The caller (vm_object_do_collapse())
1450 * sets "encrypted_ok" in this case.
91447636 1451 */
2d21ac55 1452 if (!encrypted_ok && mem->encrypted) {
91447636
A
1453 panic("vm_page_rename: page %p is encrypted\n", mem);
1454 }
2d21ac55 1455
b0d623f7
A
1456 XPR(XPR_VM_PAGE,
1457 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1458 new_object, new_offset,
1459 mem, 0,0);
1460
1c79356b
A
1461 /*
1462 * Changes to mem->object require the page lock because
1463 * the pageout daemon uses that lock to get the object.
1464 */
b0d623f7 1465 vm_page_lockspin_queues();
1c79356b 1466
b0d623f7
A
1467 vm_page_remove(mem, TRUE);
1468 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1c79356b 1469
1c79356b
A
1470 vm_page_unlock_queues();
1471}
1472
1473/*
1474 * vm_page_init:
1475 *
1476 * Initialize the fields in a new page.
1477 * This takes a structure with random values and initializes it
1478 * so that it can be given to vm_page_release or vm_page_insert.
1479 */
1480void
1481vm_page_init(
1482 vm_page_t mem,
55e303ae 1483 ppnum_t phys_page)
1c79356b 1484{
91447636 1485 assert(phys_page);
1c79356b 1486 *mem = vm_page_template;
55e303ae 1487 mem->phys_page = phys_page;
1c79356b
A
1488}
1489
1490/*
1491 * vm_page_grab_fictitious:
1492 *
1493 * Remove a fictitious page from the free list.
1494 * Returns VM_PAGE_NULL if there are no free pages.
1495 */
1496int c_vm_page_grab_fictitious = 0;
1497int c_vm_page_release_fictitious = 0;
1498int c_vm_page_more_fictitious = 0;
1499
b0d623f7 1500extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
2d21ac55 1501
1c79356b 1502vm_page_t
2d21ac55 1503vm_page_grab_fictitious_common(
b0d623f7 1504 ppnum_t phys_addr)
1c79356b
A
1505{
1506 register vm_page_t m;
1507
1508 m = (vm_page_t)zget(vm_page_zone);
1509 if (m) {
2d21ac55 1510 vm_page_init(m, phys_addr);
1c79356b 1511 m->fictitious = TRUE;
1c79356b
A
1512 }
1513
1514 c_vm_page_grab_fictitious++;
1515 return m;
1516}
1517
2d21ac55
A
1518vm_page_t
1519vm_page_grab_fictitious(void)
1520{
1521 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1522}
1523
1524vm_page_t
1525vm_page_grab_guard(void)
1526{
1527 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1528}
1529
1c79356b
A
1530/*
1531 * vm_page_release_fictitious:
1532 *
1533 * Release a fictitious page to the free list.
1534 */
1535
1536void
1537vm_page_release_fictitious(
1538 register vm_page_t m)
1539{
1540 assert(!m->free);
1541 assert(m->busy);
1542 assert(m->fictitious);
2d21ac55
A
1543 assert(m->phys_page == vm_page_fictitious_addr ||
1544 m->phys_page == vm_page_guard_addr);
1c79356b
A
1545
1546 c_vm_page_release_fictitious++;
91447636 1547#if DEBUG
1c79356b
A
1548 if (m->free)
1549 panic("vm_page_release_fictitious");
91447636 1550#endif
1c79356b 1551 m->free = TRUE;
91447636 1552 zfree(vm_page_zone, m);
1c79356b
A
1553}
1554
1555/*
1556 * vm_page_more_fictitious:
1557 *
1558 * Add more fictitious pages to the free list.
1559 * Allowed to block. This routine is way intimate
1560 * with the zones code, for several reasons:
1561 * 1. we need to carve some page structures out of physical
1562 * memory before zones work, so they _cannot_ come from
1563 * the zone_map.
1564 * 2. the zone needs to be collectable in order to prevent
1565 * growth without bound. These structures are used by
1566 * the device pager (by the hundreds and thousands), as
1567 * private pages for pageout, and as blocking pages for
1568 * pagein. Temporary bursts in demand should not result in
1569 * permanent allocation of a resource.
1570 * 3. To smooth allocation humps, we allocate single pages
1571 * with kernel_memory_allocate(), and cram them into the
1572 * zone. This also allows us to initialize the vm_page_t's
1573 * on the way into the zone, so that zget() always returns
1574 * an initialized structure. The zone free element pointer
1575 * and the free page pointer are both the first item in the
1576 * vm_page_t.
1577 * 4. By having the pages in the zone pre-initialized, we need
1578 * not keep 2 levels of lists. The garbage collector simply
1579 * scans our list, and reduces physical memory usage as it
1580 * sees fit.
1581 */
1582
1583void vm_page_more_fictitious(void)
1584{
1c79356b
A
1585 register vm_page_t m;
1586 vm_offset_t addr;
1587 kern_return_t retval;
1588 int i;
1589
1590 c_vm_page_more_fictitious++;
1591
1c79356b
A
1592 /*
1593 * Allocate a single page from the zone_map. Do not wait if no physical
1594 * pages are immediately available, and do not zero the space. We need
1595 * our own blocking lock here to prevent having multiple,
1596 * simultaneous requests from piling up on the zone_map lock. Exactly
1597 * one (of our) threads should be potentially waiting on the map lock.
1598 * If winner is not vm-privileged, then the page allocation will fail,
1599 * and it will temporarily block here in the vm_page_wait().
1600 */
b0d623f7 1601 lck_mtx_lock(&vm_page_alloc_lock);
1c79356b
A
1602 /*
1603 * If another thread allocated space, just bail out now.
1604 */
1605 if (zone_free_count(vm_page_zone) > 5) {
1606 /*
1607 * The number "5" is a small number that is larger than the
1608 * number of fictitious pages that any single caller will
1609 * attempt to allocate. Otherwise, a thread will attempt to
1610 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1611 * release all of the resources and locks already acquired,
1612 * and then call this routine. This routine finds the pages
1613 * that the caller released, so fails to allocate new space.
1614 * The process repeats infinitely. The largest known number
1615 * of fictitious pages required in this manner is 2. 5 is
1616 * simply a somewhat larger number.
1617 */
b0d623f7 1618 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1619 return;
1620 }
1621
91447636
A
1622 retval = kernel_memory_allocate(zone_map,
1623 &addr, PAGE_SIZE, VM_PROT_ALL,
1624 KMA_KOBJECT|KMA_NOPAGEWAIT);
1625 if (retval != KERN_SUCCESS) {
1c79356b
A
1626 /*
1627 * No page was available. Tell the pageout daemon, drop the
1628 * lock to give another thread a chance at it, and
1629 * wait for the pageout daemon to make progress.
1630 */
b0d623f7 1631 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1632 vm_page_wait(THREAD_UNINT);
1633 return;
1634 }
1635 /*
1636 * Initialize as many vm_page_t's as will fit on this page. This
1637 * depends on the zone code disturbing ONLY the first item of
1638 * each zone element.
1639 */
1640 m = (vm_page_t)addr;
1641 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1642 vm_page_init(m, vm_page_fictitious_addr);
1643 m->fictitious = TRUE;
1644 m++;
1645 }
91447636 1646 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
b0d623f7 1647 lck_mtx_unlock(&vm_page_alloc_lock);
1c79356b
A
1648}
1649
1c79356b
A
1650
1651/*
1652 * vm_pool_low():
1653 *
1654 * Return true if it is not likely that a non-vm_privileged thread
1655 * can get memory without blocking. Advisory only, since the
1656 * situation may change under us.
1657 */
1658int
1659vm_pool_low(void)
1660{
1661 /* No locking, at worst we will fib. */
b0d623f7 1662 return( vm_page_free_count <= vm_page_free_reserved );
1c79356b
A
1663}
1664
0c530ab8
A
1665
1666
1667/*
1668 * this is an interface to support bring-up of drivers
1669 * on platforms with physical memory > 4G...
1670 */
1671int vm_himemory_mode = 0;
1672
1673
1674/*
1675 * this interface exists to support hardware controllers
1676 * incapable of generating DMAs with more than 32 bits
1677 * of address on platforms with physical memory > 4G...
1678 */
1679unsigned int vm_lopage_free_count = 0;
1680unsigned int vm_lopage_max_count = 0;
2d21ac55 1681queue_head_t vm_lopage_queue_free;
0c530ab8
A
1682
1683vm_page_t
1684vm_page_grablo(void)
1685{
1686 register vm_page_t mem;
1687 unsigned int vm_lopage_alloc_count;
1688
1689 if (vm_lopage_poolsize == 0)
1690 return (vm_page_grab());
1691
b0d623f7 1692 lck_mtx_lock_spin(&vm_page_queue_free_lock);
0c530ab8 1693
2d21ac55
A
1694 if (! queue_empty(&vm_lopage_queue_free)) {
1695 queue_remove_first(&vm_lopage_queue_free,
1696 mem,
1697 vm_page_t,
1698 pageq);
1699 assert(mem->free);
1700 assert(mem->busy);
1701 assert(!mem->pmapped);
4a3eedf9 1702 assert(!mem->wpmapped);
0c530ab8 1703
0c530ab8
A
1704 mem->pageq.next = NULL;
1705 mem->pageq.prev = NULL;
1706 mem->free = FALSE;
0c530ab8
A
1707
1708 vm_lopage_free_count--;
1709 vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1710 if (vm_lopage_alloc_count > vm_lopage_max_count)
1711 vm_lopage_max_count = vm_lopage_alloc_count;
2d21ac55
A
1712 } else {
1713 mem = VM_PAGE_NULL;
0c530ab8 1714 }
b0d623f7 1715 lck_mtx_unlock(&vm_page_queue_free_lock);
0c530ab8
A
1716
1717 return (mem);
1718}
1719
1720
1c79356b
A
1721/*
1722 * vm_page_grab:
1723 *
2d21ac55
A
1724 * first try to grab a page from the per-cpu free list...
1725 * this must be done while pre-emption is disabled... if
1726 * a page is available, we're done...
1727 * if no page is available, grab the vm_page_queue_free_lock
1728 * and see if current number of free pages would allow us
1729 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1730 * if there are pages available, disable preemption and
1731 * recheck the state of the per-cpu free list... we could
1732 * have been preempted and moved to a different cpu, or
1733 * some other thread could have re-filled it... if still
1734 * empty, figure out how many pages we can steal from the
1735 * global free queue and move to the per-cpu queue...
1736 * return 1 of these pages when done... only wakeup the
1737 * pageout_scan thread if we moved pages from the global
1738 * list... no need for the wakeup if we've satisfied the
1739 * request from the per-cpu queue.
1c79356b
A
1740 */
1741
2d21ac55
A
1742#define COLOR_GROUPS_TO_STEAL 4
1743
1c79356b
A
1744
1745vm_page_t
2d21ac55 1746vm_page_grab( void )
1c79356b 1747{
2d21ac55
A
1748 vm_page_t mem;
1749
1750
1751 disable_preemption();
1752
1753 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1754return_page_from_cpu_list:
1755 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1756 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1757 mem->pageq.next = NULL;
1758
1759 enable_preemption();
1760
1761 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1762 assert(mem->tabled == FALSE);
1763 assert(mem->object == VM_OBJECT_NULL);
1764 assert(!mem->laundry);
1765 assert(!mem->free);
1766 assert(pmap_verify_free(mem->phys_page));
1767 assert(mem->busy);
1768 assert(!mem->encrypted);
1769 assert(!mem->pmapped);
4a3eedf9 1770 assert(!mem->wpmapped);
2d21ac55
A
1771
1772 return mem;
1773 }
1774 enable_preemption();
1775
1c79356b 1776
1c79356b
A
1777 /*
1778 * Optionally produce warnings if the wire or gobble
1779 * counts exceed some threshold.
1780 */
1781 if (vm_page_wire_count_warning > 0
1782 && vm_page_wire_count >= vm_page_wire_count_warning) {
1783 printf("mk: vm_page_grab(): high wired page count of %d\n",
1784 vm_page_wire_count);
1785 assert(vm_page_wire_count < vm_page_wire_count_warning);
1786 }
1787 if (vm_page_gobble_count_warning > 0
1788 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1789 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1790 vm_page_gobble_count);
1791 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1792 }
1793
b0d623f7
A
1794 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1795
1c79356b
A
1796 /*
1797 * Only let privileged threads (involved in pageout)
1798 * dip into the reserved pool.
1799 */
1c79356b 1800 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 1801 !(current_thread()->options & TH_OPT_VMPRIV)) {
b0d623f7 1802 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 1803 mem = VM_PAGE_NULL;
1c79356b 1804 }
2d21ac55
A
1805 else {
1806 vm_page_t head;
1807 vm_page_t tail;
1808 unsigned int pages_to_steal;
1809 unsigned int color;
1c79356b 1810
2d21ac55 1811 while ( vm_page_free_count == 0 ) {
1c79356b 1812
b0d623f7 1813 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1814 /*
1815 * must be a privileged thread to be
1816 * in this state since a non-privileged
1817 * thread would have bailed if we were
1818 * under the vm_page_free_reserved mark
1819 */
1820 VM_PAGE_WAIT();
b0d623f7 1821 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
1822 }
1823
1824 disable_preemption();
1825
1826 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
b0d623f7 1827 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1828
1829 /*
1830 * we got preempted and moved to another processor
1831 * or we got preempted and someone else ran and filled the cache
1832 */
1833 goto return_page_from_cpu_list;
1834 }
1835 if (vm_page_free_count <= vm_page_free_reserved)
1836 pages_to_steal = 1;
1837 else {
1838 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1839
1840 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1841 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1842 }
1843 color = PROCESSOR_DATA(current_processor(), start_color);
1844 head = tail = NULL;
1845
1846 while (pages_to_steal--) {
1847 if (--vm_page_free_count < vm_page_free_count_minimum)
1848 vm_page_free_count_minimum = vm_page_free_count;
1849
1850 while (queue_empty(&vm_page_queue_free[color]))
1851 color = (color + 1) & vm_color_mask;
1852
1853 queue_remove_first(&vm_page_queue_free[color],
1854 mem,
1855 vm_page_t,
1856 pageq);
1857 mem->pageq.next = NULL;
1858 mem->pageq.prev = NULL;
1859
1860 color = (color + 1) & vm_color_mask;
1861
1862 if (head == NULL)
1863 head = mem;
1864 else
1865 tail->pageq.next = (queue_t)mem;
1866 tail = mem;
1867
1868 mem->pageq.prev = NULL;
1869 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1870 assert(mem->tabled == FALSE);
1871 assert(mem->object == VM_OBJECT_NULL);
1872 assert(!mem->laundry);
1873 assert(mem->free);
1874 mem->free = FALSE;
1875
1876 assert(pmap_verify_free(mem->phys_page));
1877 assert(mem->busy);
1878 assert(!mem->free);
1879 assert(!mem->encrypted);
1880 assert(!mem->pmapped);
4a3eedf9 1881 assert(!mem->wpmapped);
2d21ac55
A
1882 }
1883 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1884 PROCESSOR_DATA(current_processor(), start_color) = color;
1885
1886 /*
1887 * satisfy this request
1888 */
1889 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1890 mem = head;
1891 mem->pageq.next = NULL;
91447636 1892
b0d623f7 1893 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1894
1895 enable_preemption();
1896 }
1c79356b
A
1897 /*
1898 * Decide if we should poke the pageout daemon.
1899 * We do this if the free count is less than the low
1900 * water mark, or if the free count is less than the high
1901 * water mark (but above the low water mark) and the inactive
1902 * count is less than its target.
1903 *
1904 * We don't have the counts locked ... if they change a little,
1905 * it doesn't really matter.
1906 */
1c79356b
A
1907 if ((vm_page_free_count < vm_page_free_min) ||
1908 ((vm_page_free_count < vm_page_free_target) &&
2d21ac55
A
1909 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1910 thread_wakeup((event_t) &vm_page_free_wanted);
1911
1912#if CONFIG_EMBEDDED
1913 {
1914 int percent_avail;
1915
1916 /*
1917 * Decide if we need to poke the memorystatus notification thread.
1918 */
1919 percent_avail =
1920 (vm_page_active_count + vm_page_inactive_count +
1921 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 1922 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
1923 atop_64(max_mem);
1924 if (percent_avail <= (kern_memorystatus_level - 5)) {
1925 kern_memorystatus_level = percent_avail;
1926 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1927 }
1928 }
1929#endif
1c79356b 1930
55e303ae 1931// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1c79356b
A
1932
1933 return mem;
1934}
1935
1936/*
1937 * vm_page_release:
1938 *
1939 * Return a page to the free list.
1940 */
1941
1942void
1943vm_page_release(
1944 register vm_page_t mem)
1945{
2d21ac55 1946 unsigned int color;
b0d623f7
A
1947 int need_wakeup = 0;
1948 int need_priv_wakeup = 0;
55e303ae
A
1949#if 0
1950 unsigned int pindex;
1951 phys_entry *physent;
1952
1953 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1954 if(physent->ppLink & ppN) { /* (BRINGUP) */
1955 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1956 }
1957 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1958#endif
1c79356b 1959 assert(!mem->private && !mem->fictitious);
b0d623f7
A
1960 if (vm_page_free_verify) {
1961 assert(pmap_verify_free(mem->phys_page));
1962 }
55e303ae 1963// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b 1964
b0d623f7
A
1965
1966 lck_mtx_lock_spin(&vm_page_queue_free_lock);
91447636 1967#if DEBUG
1c79356b
A
1968 if (mem->free)
1969 panic("vm_page_release");
91447636 1970#endif
1c79356b 1971 mem->free = TRUE;
2d21ac55
A
1972
1973 assert(mem->busy);
91447636
A
1974 assert(!mem->laundry);
1975 assert(mem->object == VM_OBJECT_NULL);
1976 assert(mem->pageq.next == NULL &&
1977 mem->pageq.prev == NULL);
2d21ac55
A
1978 assert(mem->listq.next == NULL &&
1979 mem->listq.prev == NULL);
1980
0c530ab8
A
1981 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1982 /*
1983 * this exists to support hardware controllers
1984 * incapable of generating DMAs with more than 32 bits
1985 * of address on platforms with physical memory > 4G...
1986 */
2d21ac55
A
1987 queue_enter_first(&vm_lopage_queue_free,
1988 mem,
1989 vm_page_t,
1990 pageq);
0c530ab8
A
1991 vm_lopage_free_count++;
1992 } else {
2d21ac55
A
1993 color = mem->phys_page & vm_color_mask;
1994 queue_enter_first(&vm_page_queue_free[color],
1995 mem,
1996 vm_page_t,
1997 pageq);
0c530ab8
A
1998 vm_page_free_count++;
1999 /*
2000 * Check if we should wake up someone waiting for page.
2001 * But don't bother waking them unless they can allocate.
2002 *
2003 * We wakeup only one thread, to prevent starvation.
2004 * Because the scheduling system handles wait queues FIFO,
2005 * if we wakeup all waiting threads, one greedy thread
2006 * can starve multiple niceguy threads. When the threads
2007 * all wakeup, the greedy threads runs first, grabs the page,
2008 * and waits for another page. It will be the first to run
2009 * when the next page is freed.
2010 *
2011 * However, there is a slight danger here.
2012 * The thread we wake might not use the free page.
2013 * Then the other threads could wait indefinitely
2014 * while the page goes unused. To forestall this,
2015 * the pageout daemon will keep making free pages
2016 * as long as vm_page_free_wanted is non-zero.
2017 */
1c79356b 2018
b0d623f7
A
2019 assert(vm_page_free_count > 0);
2020 if (vm_page_free_wanted_privileged > 0) {
2d21ac55 2021 vm_page_free_wanted_privileged--;
b0d623f7
A
2022 need_priv_wakeup = 1;
2023 } else if (vm_page_free_wanted > 0 &&
2024 vm_page_free_count > vm_page_free_reserved) {
0c530ab8 2025 vm_page_free_wanted--;
b0d623f7 2026 need_wakeup = 1;
0c530ab8 2027 }
1c79356b 2028 }
b0d623f7
A
2029 lck_mtx_unlock(&vm_page_queue_free_lock);
2030
2031 if (need_priv_wakeup)
2032 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2033 else if (need_wakeup)
2034 thread_wakeup_one((event_t) &vm_page_free_count);
2d21ac55
A
2035
2036#if CONFIG_EMBEDDED
2037 {
2038 int percent_avail;
2039
2040 /*
2041 * Decide if we need to poke the memorystatus notification thread.
2042 * Locking is not a big issue, as only a single thread delivers these.
2043 */
2044 percent_avail =
2045 (vm_page_active_count + vm_page_inactive_count +
2046 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 2047 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
2048 atop_64(max_mem);
2049 if (percent_avail >= (kern_memorystatus_level + 5)) {
2050 kern_memorystatus_level = percent_avail;
2051 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2052 }
2053 }
2054#endif
1c79356b
A
2055}
2056
1c79356b
A
2057/*
2058 * vm_page_wait:
2059 *
2060 * Wait for a page to become available.
2061 * If there are plenty of free pages, then we don't sleep.
2062 *
2063 * Returns:
2064 * TRUE: There may be another page, try again
2065 * FALSE: We were interrupted out of our wait, don't try again
2066 */
2067
2068boolean_t
2069vm_page_wait(
2070 int interruptible )
2071{
2072 /*
2073 * We can't use vm_page_free_reserved to make this
2074 * determination. Consider: some thread might
2075 * need to allocate two pages. The first allocation
2076 * succeeds, the second fails. After the first page is freed,
2077 * a call to vm_page_wait must really block.
2078 */
9bccf70c 2079 kern_return_t wait_result;
9bccf70c 2080 int need_wakeup = 0;
2d21ac55 2081 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b 2082
b0d623f7 2083 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2d21ac55
A
2084
2085 if (is_privileged && vm_page_free_count) {
b0d623f7 2086 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2087 return TRUE;
2088 }
1c79356b 2089 if (vm_page_free_count < vm_page_free_target) {
2d21ac55
A
2090
2091 if (is_privileged) {
2092 if (vm_page_free_wanted_privileged++ == 0)
2093 need_wakeup = 1;
2094 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2095 } else {
2096 if (vm_page_free_wanted++ == 0)
2097 need_wakeup = 1;
2098 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2099 }
b0d623f7 2100 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b 2101 counter(c_vm_page_wait_block++);
0b4e3aa0
A
2102
2103 if (need_wakeup)
2104 thread_wakeup((event_t)&vm_page_free_wanted);
9bccf70c 2105
91447636 2106 if (wait_result == THREAD_WAITING)
9bccf70c
A
2107 wait_result = thread_block(THREAD_CONTINUE_NULL);
2108
1c79356b
A
2109 return(wait_result == THREAD_AWAKENED);
2110 } else {
b0d623f7 2111 lck_mtx_unlock(&vm_page_queue_free_lock);
1c79356b
A
2112 return TRUE;
2113 }
2114}
2115
2116/*
2117 * vm_page_alloc:
2118 *
2119 * Allocate and return a memory cell associated
2120 * with this VM object/offset pair.
2121 *
2122 * Object must be locked.
2123 */
2124
2125vm_page_t
2126vm_page_alloc(
2127 vm_object_t object,
2128 vm_object_offset_t offset)
2129{
2130 register vm_page_t mem;
2131
2d21ac55 2132 vm_object_lock_assert_exclusive(object);
1c79356b
A
2133 mem = vm_page_grab();
2134 if (mem == VM_PAGE_NULL)
2135 return VM_PAGE_NULL;
2136
2137 vm_page_insert(mem, object, offset);
2138
2139 return(mem);
2140}
2141
0c530ab8
A
2142vm_page_t
2143vm_page_alloclo(
2144 vm_object_t object,
2145 vm_object_offset_t offset)
2146{
2147 register vm_page_t mem;
2148
2d21ac55 2149 vm_object_lock_assert_exclusive(object);
0c530ab8
A
2150 mem = vm_page_grablo();
2151 if (mem == VM_PAGE_NULL)
2152 return VM_PAGE_NULL;
2153
2154 vm_page_insert(mem, object, offset);
2155
2156 return(mem);
2157}
2158
2159
2d21ac55
A
2160/*
2161 * vm_page_alloc_guard:
2162 *
b0d623f7 2163 * Allocate a fictitious page which will be used
2d21ac55
A
2164 * as a guard page. The page will be inserted into
2165 * the object and returned to the caller.
2166 */
2167
2168vm_page_t
2169vm_page_alloc_guard(
2170 vm_object_t object,
2171 vm_object_offset_t offset)
2172{
2173 register vm_page_t mem;
2174
2175 vm_object_lock_assert_exclusive(object);
2176 mem = vm_page_grab_guard();
2177 if (mem == VM_PAGE_NULL)
2178 return VM_PAGE_NULL;
2179
2180 vm_page_insert(mem, object, offset);
2181
2182 return(mem);
2183}
2184
2185
1c79356b
A
2186counter(unsigned int c_laundry_pages_freed = 0;)
2187
1c79356b
A
2188/*
2189 * vm_page_free:
2190 *
2191 * Returns the given page to the free list,
2192 * disassociating it with any VM object.
2193 *
2194 * Object and page queues must be locked prior to entry.
2195 */
b0d623f7 2196static void
2d21ac55 2197vm_page_free_prepare(
1c79356b 2198 register vm_page_t mem)
b0d623f7
A
2199{
2200 vm_page_free_prepare_queues(mem);
2201 vm_page_free_prepare_object(mem, TRUE);
2202}
2203
2204
2205void
2206vm_page_free_prepare_queues(
2207 vm_page_t mem)
1c79356b 2208{
2d21ac55 2209 VM_PAGE_CHECK(mem);
1c79356b
A
2210 assert(!mem->free);
2211 assert(!mem->cleaning);
2212 assert(!mem->pageout);
2d21ac55 2213#if DEBUG
b0d623f7 2214 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2215 if (mem->free)
b0d623f7 2216 panic("vm_page_free: freeing page on free list\n");
91447636 2217#endif
b0d623f7
A
2218 if (mem->object) {
2219 vm_object_lock_assert_exclusive(mem->object);
2220 }
2d21ac55
A
2221
2222 if (mem->laundry) {
2223 /*
2224 * We may have to free a page while it's being laundered
2225 * if we lost its pager (due to a forced unmount, for example).
2226 * We need to call vm_pageout_throttle_up() before removing
2227 * the page from its VM object, so that we can find out on
b0d623f7 2228 * which pageout queue the page is on.
2d21ac55
A
2229 */
2230 vm_pageout_throttle_up(mem);
2231 counter(++c_laundry_pages_freed);
2232 }
b0d623f7
A
2233 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2234
2235 if (VM_PAGE_WIRED(mem)) {
2236 if (mem->object) {
2237 assert(mem->object->wired_page_count > 0);
2238 mem->object->wired_page_count--;
2239 assert(mem->object->resident_page_count >=
2240 mem->object->wired_page_count);
2241 }
1c79356b
A
2242 if (!mem->private && !mem->fictitious)
2243 vm_page_wire_count--;
2244 mem->wire_count = 0;
2245 assert(!mem->gobbled);
2246 } else if (mem->gobbled) {
2247 if (!mem->private && !mem->fictitious)
2248 vm_page_wire_count--;
2249 vm_page_gobble_count--;
2250 }
b0d623f7
A
2251}
2252
2253
2254void
2255vm_page_free_prepare_object(
2256 vm_page_t mem,
2257 boolean_t remove_from_hash)
2258{
2259 if (mem->object) {
2260 vm_object_lock_assert_exclusive(mem->object);
2261 }
1c79356b 2262
b0d623f7
A
2263 if (mem->tabled)
2264 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
1c79356b 2265
b0d623f7 2266 PAGE_WAKEUP(mem); /* clears wanted */
1c79356b
A
2267
2268 if (mem->private) {
2269 mem->private = FALSE;
2270 mem->fictitious = TRUE;
55e303ae 2271 mem->phys_page = vm_page_fictitious_addr;
1c79356b 2272 }
b0d623f7
A
2273 if (mem->fictitious) {
2274 /* Some of these may be unnecessary */
2275 mem->gobbled = FALSE;
2276 mem->busy = TRUE;
2277 mem->absent = FALSE;
2278 mem->error = FALSE;
2279 mem->dirty = FALSE;
2280 mem->precious = FALSE;
2281 mem->reference = FALSE;
2282 mem->encrypted = FALSE;
2283 mem->encrypted_cleaning = FALSE;
2284 mem->pmapped = FALSE;
2285 mem->wpmapped = FALSE;
2286 mem->reusable = FALSE;
2287 } else {
2288 if (mem->zero_fill == TRUE)
2289 VM_ZF_COUNT_DECR();
55e303ae 2290 vm_page_init(mem, mem->phys_page);
1c79356b
A
2291 }
2292}
2293
b0d623f7 2294
2d21ac55
A
2295void
2296vm_page_free(
2297 vm_page_t mem)
2298{
b0d623f7
A
2299 vm_page_free_prepare(mem);
2300 if (mem->fictitious) {
2301 vm_page_release_fictitious(mem);
2302 } else {
2303 vm_page_release(mem);
2304 }
2305}
2306
2307
2308void
2309vm_page_free_unlocked(
2310 vm_page_t mem,
2311 boolean_t remove_from_hash)
2312{
2313 vm_page_lockspin_queues();
2314 vm_page_free_prepare_queues(mem);
2315 vm_page_unlock_queues();
2316
2317 vm_page_free_prepare_object(mem, remove_from_hash);
2318
2d21ac55
A
2319 if (mem->fictitious) {
2320 vm_page_release_fictitious(mem);
2321 } else {
2322 vm_page_release(mem);
2323 }
2324}
55e303ae 2325
2d21ac55
A
2326/*
2327 * Free a list of pages. The list can be up to several hundred pages,
2328 * as blocked up by vm_pageout_scan().
b0d623f7 2329 * The big win is not having to take the free list lock once
2d21ac55
A
2330 * per page. We sort the incoming pages into n lists, one for
2331 * each color.
2d21ac55 2332 */
55e303ae
A
2333void
2334vm_page_free_list(
b0d623f7
A
2335 vm_page_t mem,
2336 boolean_t prepare_object)
55e303ae 2337{
2d21ac55
A
2338 vm_page_t nxt;
2339 int pg_count = 0;
2340 int color;
2341 int inuse_list_head = -1;
2342
2343 queue_head_t free_list[MAX_COLORS];
2344 int inuse[MAX_COLORS];
55e303ae 2345
2d21ac55
A
2346 for (color = 0; color < (signed) vm_colors; color++) {
2347 queue_init(&free_list[color]);
2348 }
2349
55e303ae 2350 while (mem) {
b0d623f7
A
2351 assert(!mem->inactive);
2352 assert(!mem->active);
2353 assert(!mem->throttled);
2354 assert(!mem->free);
2355 assert(!mem->speculative);
2356 assert(mem->pageq.prev == NULL);
2357
2358 nxt = (vm_page_t)(mem->pageq.next);
2359
2360 if (prepare_object == TRUE)
2361 vm_page_free_prepare_object(mem, TRUE);
2362
2d21ac55
A
2363 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2364 assert(pmap_verify_free(mem->phys_page));
2365 }
2d21ac55 2366 assert(mem->busy);
55e303ae 2367
55e303ae 2368 if (!mem->fictitious) {
935ed37a
A
2369 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
2370 mem->pageq.next = NULL;
2371 vm_page_release(mem);
2372 } else {
935ed37a 2373
b0d623f7
A
2374 /*
2375 * IMPORTANT: we can't set the page "free" here
2376 * because that would make the page eligible for
2377 * a physically-contiguous allocation (see
2378 * vm_page_find_contiguous()) right away (we don't
2379 * hold the vm_page_queue_free lock). That would
2380 * cause trouble because the page is not actually
2381 * in the free queue yet...
2382 */
935ed37a
A
2383 color = mem->phys_page & vm_color_mask;
2384 if (queue_empty(&free_list[color])) {
2385 inuse[color] = inuse_list_head;
2386 inuse_list_head = color;
2387 }
2388 queue_enter_first(&free_list[color],
2389 mem,
2390 vm_page_t,
2391 pageq);
2392 pg_count++;
2d21ac55 2393 }
55e303ae 2394 } else {
2d21ac55
A
2395 assert(mem->phys_page == vm_page_fictitious_addr ||
2396 mem->phys_page == vm_page_guard_addr);
55e303ae
A
2397 vm_page_release_fictitious(mem);
2398 }
2399 mem = nxt;
2400 }
2d21ac55
A
2401 if (pg_count) {
2402 unsigned int avail_free_count;
b0d623f7
A
2403 unsigned int need_wakeup = 0;
2404 unsigned int need_priv_wakeup = 0;
2d21ac55 2405
b0d623f7 2406 lck_mtx_lock_spin(&vm_page_queue_free_lock);
55e303ae 2407
2d21ac55
A
2408 color = inuse_list_head;
2409
2410 while( color != -1 ) {
2411 vm_page_t first, last;
2412 vm_page_t first_free;
2413
b0d623f7
A
2414 /*
2415 * Now that we hold the vm_page_queue_free lock,
2416 * it's safe to mark all pages in our local queue
2417 * as "free"...
2418 */
2419 queue_iterate(&free_list[color],
2420 mem,
2421 vm_page_t,
2422 pageq) {
2423 assert(!mem->free);
2424 assert(mem->busy);
2425 mem->free = TRUE;
2426 }
2427
2428 /*
2429 * ... and insert our local queue at the head of
2430 * the global free queue.
2431 */
2d21ac55
A
2432 first = (vm_page_t) queue_first(&free_list[color]);
2433 last = (vm_page_t) queue_last(&free_list[color]);
2434 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2d21ac55
A
2435 if (queue_empty(&vm_page_queue_free[color])) {
2436 queue_last(&vm_page_queue_free[color]) =
2437 (queue_entry_t) last;
2438 } else {
2439 queue_prev(&first_free->pageq) =
2440 (queue_entry_t) last;
2441 }
2442 queue_first(&vm_page_queue_free[color]) =
2443 (queue_entry_t) first;
2444 queue_prev(&first->pageq) =
2445 (queue_entry_t) &vm_page_queue_free[color];
2446 queue_next(&last->pageq) =
2447 (queue_entry_t) first_free;
b0d623f7
A
2448
2449 /* next color */
2d21ac55
A
2450 color = inuse[color];
2451 }
2452
55e303ae 2453 vm_page_free_count += pg_count;
2d21ac55
A
2454 avail_free_count = vm_page_free_count;
2455
b0d623f7
A
2456 if (vm_page_free_wanted_privileged > 0 &&
2457 avail_free_count > 0) {
2458 if (avail_free_count < vm_page_free_wanted_privileged) {
2459 need_priv_wakeup = avail_free_count;
2460 vm_page_free_wanted_privileged -=
2461 avail_free_count;
2462 avail_free_count = 0;
2463 } else {
2464 need_priv_wakeup = vm_page_free_wanted_privileged;
2465 vm_page_free_wanted_privileged = 0;
2466 avail_free_count -=
2467 vm_page_free_wanted_privileged;
2468 }
2d21ac55 2469 }
55e303ae 2470
b0d623f7
A
2471 if (vm_page_free_wanted > 0 &&
2472 avail_free_count > vm_page_free_reserved) {
91447636 2473 unsigned int available_pages;
55e303ae 2474
b0d623f7
A
2475 available_pages = (avail_free_count -
2476 vm_page_free_reserved);
55e303ae
A
2477
2478 if (available_pages >= vm_page_free_wanted) {
b0d623f7 2479 need_wakeup = vm_page_free_wanted;
55e303ae 2480 vm_page_free_wanted = 0;
55e303ae 2481 } else {
b0d623f7
A
2482 need_wakeup = available_pages;
2483 vm_page_free_wanted -= available_pages;
55e303ae
A
2484 }
2485 }
b0d623f7 2486 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 2487
b0d623f7
A
2488 if (need_priv_wakeup != 0) {
2489 /*
2490 * There shouldn't be that many VM-privileged threads,
2491 * so let's wake them all up, even if we don't quite
2492 * have enough pages to satisfy them all.
2493 */
2494 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2495 }
2496 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2497 /*
2498 * We don't expect to have any more waiters
2499 * after this, so let's wake them all up at
2500 * once.
2501 */
2502 thread_wakeup((event_t) &vm_page_free_count);
2503 } else for (; need_wakeup != 0; need_wakeup--) {
2504 /*
2505 * Wake up one waiter per page we just released.
2506 */
2507 thread_wakeup_one((event_t) &vm_page_free_count);
2508 }
2d21ac55
A
2509#if CONFIG_EMBEDDED
2510 {
2511 int percent_avail;
2512
2513 /*
2514 * Decide if we need to poke the memorystatus notification thread.
2515 */
2516 percent_avail =
2517 (vm_page_active_count + vm_page_inactive_count +
2518 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 2519 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
2520 atop_64(max_mem);
2521 if (percent_avail >= (kern_memorystatus_level + 5)) {
2522 kern_memorystatus_level = percent_avail;
2523 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2524 }
2525 }
2526#endif
55e303ae
A
2527 }
2528}
2529
2530
1c79356b
A
2531/*
2532 * vm_page_wire:
2533 *
2534 * Mark this page as wired down by yet
2535 * another map, removing it from paging queues
2536 * as necessary.
2537 *
2538 * The page's object and the page queues must be locked.
2539 */
2540void
2541vm_page_wire(
2542 register vm_page_t mem)
2543{
2544
91447636 2545// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1c79356b
A
2546
2547 VM_PAGE_CHECK(mem);
b0d623f7
A
2548 if (mem->object) {
2549 vm_object_lock_assert_exclusive(mem->object);
2550 } else {
2551 /*
2552 * In theory, the page should be in an object before it
2553 * gets wired, since we need to hold the object lock
2554 * to update some fields in the page structure.
2555 * However, some code (i386 pmap, for example) might want
2556 * to wire a page before it gets inserted into an object.
2557 * That's somewhat OK, as long as nobody else can get to
2558 * that page and update it at the same time.
2559 */
2560 }
91447636 2561#if DEBUG
b0d623f7 2562 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2563#endif
b0d623f7 2564 if ( !VM_PAGE_WIRED(mem)) {
1c79356b 2565 VM_PAGE_QUEUES_REMOVE(mem);
b0d623f7
A
2566
2567 if (mem->object) {
2568 mem->object->wired_page_count++;
2569 assert(mem->object->resident_page_count >=
2570 mem->object->wired_page_count);
2571 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2572 assert(vm_page_purgeable_count > 0);
2573 OSAddAtomic(-1, &vm_page_purgeable_count);
2574 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2575 }
2576 if (mem->object->all_reusable) {
2577 /*
2578 * Wired pages are not counted as "re-usable"
2579 * in "all_reusable" VM objects, so nothing
2580 * to do here.
2581 */
2582 } else if (mem->reusable) {
2583 /*
2584 * This page is not "re-usable" when it's
2585 * wired, so adjust its state and the
2586 * accounting.
2587 */
2588 vm_object_reuse_pages(mem->object,
2589 mem->offset,
2590 mem->offset+PAGE_SIZE_64,
2591 FALSE);
2592 }
2593 }
2594 assert(!mem->reusable);
2595
1c79356b
A
2596 if (!mem->private && !mem->fictitious && !mem->gobbled)
2597 vm_page_wire_count++;
2598 if (mem->gobbled)
2599 vm_page_gobble_count--;
2600 mem->gobbled = FALSE;
2d21ac55 2601 if (mem->zero_fill == TRUE) {
9bccf70c 2602 mem->zero_fill = FALSE;
b0d623f7 2603 VM_ZF_COUNT_DECR();
9bccf70c 2604 }
593a1d5f
A
2605#if CONFIG_EMBEDDED
2606 {
2607 int percent_avail;
2608
2609 /*
2610 * Decide if we need to poke the memorystatus notification thread.
2611 */
2612 percent_avail =
2613 (vm_page_active_count + vm_page_inactive_count +
2614 vm_page_speculative_count + vm_page_free_count +
2615 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2616 atop_64(max_mem);
2617 if (percent_avail <= (kern_memorystatus_level - 5)) {
2618 kern_memorystatus_level = percent_avail;
2619 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2620 }
2621 }
2622#endif
91447636
A
2623 /*
2624 * ENCRYPTED SWAP:
2625 * The page could be encrypted, but
2626 * We don't have to decrypt it here
2627 * because we don't guarantee that the
2628 * data is actually valid at this point.
2629 * The page will get decrypted in
2630 * vm_fault_wire() if needed.
2631 */
1c79356b
A
2632 }
2633 assert(!mem->gobbled);
2634 mem->wire_count++;
b0d623f7 2635 VM_PAGE_CHECK(mem);
1c79356b
A
2636}
2637
2638/*
2639 * vm_page_gobble:
2640 *
2641 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2642 *
2643 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2644 */
2645void
2646vm_page_gobble(
2647 register vm_page_t mem)
2648{
2d21ac55 2649 vm_page_lockspin_queues();
1c79356b
A
2650 VM_PAGE_CHECK(mem);
2651
2652 assert(!mem->gobbled);
b0d623f7 2653 assert( !VM_PAGE_WIRED(mem));
1c79356b 2654
b0d623f7 2655 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
1c79356b
A
2656 if (!mem->private && !mem->fictitious)
2657 vm_page_wire_count++;
2658 }
2659 vm_page_gobble_count++;
2660 mem->gobbled = TRUE;
2661 vm_page_unlock_queues();
2662}
2663
2664/*
2665 * vm_page_unwire:
2666 *
2667 * Release one wiring of this page, potentially
2668 * enabling it to be paged again.
2669 *
2670 * The page's object and the page queues must be locked.
2671 */
2672void
2673vm_page_unwire(
2674 register vm_page_t mem)
2675{
2676
91447636 2677// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1c79356b
A
2678
2679 VM_PAGE_CHECK(mem);
b0d623f7
A
2680 assert(VM_PAGE_WIRED(mem));
2681 assert(mem->object != VM_OBJECT_NULL);
91447636 2682#if DEBUG
b0d623f7
A
2683 vm_object_lock_assert_exclusive(mem->object);
2684 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2685#endif
1c79356b
A
2686 if (--mem->wire_count == 0) {
2687 assert(!mem->private && !mem->fictitious);
2688 vm_page_wire_count--;
b0d623f7
A
2689 assert(mem->object->wired_page_count > 0);
2690 mem->object->wired_page_count--;
2691 assert(mem->object->resident_page_count >=
2692 mem->object->wired_page_count);
2693 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2694 OSAddAtomic(+1, &vm_page_purgeable_count);
2695 assert(vm_page_purgeable_wired_count > 0);
2696 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2697 }
91447636
A
2698 assert(!mem->laundry);
2699 assert(mem->object != kernel_object);
2700 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
593a1d5f
A
2701 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2702 vm_page_deactivate(mem);
2d21ac55 2703 } else {
593a1d5f 2704 vm_page_activate(mem);
2d21ac55 2705 }
593a1d5f
A
2706#if CONFIG_EMBEDDED
2707 {
2708 int percent_avail;
2709
2710 /*
2711 * Decide if we need to poke the memorystatus notification thread.
2712 */
2713 percent_avail =
2714 (vm_page_active_count + vm_page_inactive_count +
2715 vm_page_speculative_count + vm_page_free_count +
2716 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2717 atop_64(max_mem);
2718 if (percent_avail >= (kern_memorystatus_level + 5)) {
2719 kern_memorystatus_level = percent_avail;
2720 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2721 }
2722 }
2723#endif
1c79356b 2724 }
b0d623f7 2725 VM_PAGE_CHECK(mem);
1c79356b
A
2726}
2727
2728/*
2729 * vm_page_deactivate:
2730 *
2731 * Returns the given page to the inactive list,
2732 * indicating that no physical maps have access
2733 * to this page. [Used by the physical mapping system.]
2734 *
2735 * The page queues must be locked.
2736 */
2737void
2738vm_page_deactivate(
b0d623f7
A
2739 vm_page_t m)
2740{
2741 vm_page_deactivate_internal(m, TRUE);
2742}
2743
2744
2745void
2746vm_page_deactivate_internal(
2747 vm_page_t m,
2748 boolean_t clear_hw_reference)
1c79356b 2749{
2d21ac55 2750
1c79356b 2751 VM_PAGE_CHECK(m);
91447636 2752 assert(m->object != kernel_object);
2d21ac55 2753 assert(m->phys_page != vm_page_guard_addr);
1c79356b 2754
55e303ae 2755// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
91447636 2756#if DEBUG
b0d623f7 2757 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2758#endif
1c79356b
A
2759 /*
2760 * This page is no longer very interesting. If it was
2761 * interesting (active or inactive/referenced), then we
2762 * clear the reference bit and (re)enter it in the
2763 * inactive queue. Note wired pages should not have
2764 * their reference bit cleared.
2765 */
2766 if (m->gobbled) { /* can this happen? */
b0d623f7 2767 assert( !VM_PAGE_WIRED(m));
2d21ac55 2768
1c79356b
A
2769 if (!m->private && !m->fictitious)
2770 vm_page_wire_count--;
2771 vm_page_gobble_count--;
2772 m->gobbled = FALSE;
2773 }
b0d623f7 2774 if (m->private || (VM_PAGE_WIRED(m)))
1c79356b 2775 return;
2d21ac55 2776
b0d623f7 2777 if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
2d21ac55
A
2778 pmap_clear_reference(m->phys_page);
2779
2780 m->reference = FALSE;
2d21ac55
A
2781 m->no_cache = FALSE;
2782
2783 if (!m->inactive) {
2784 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 2785
91447636
A
2786 assert(!m->laundry);
2787 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2d21ac55
A
2788
2789 if (!IP_VALID(memory_manager_default) &&
d1ecb069
A
2790 m->dirty && m->object->internal &&
2791 (m->object->purgable == VM_PURGABLE_DENY ||
2792 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2793 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2794 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2795 m->throttled = TRUE;
2796 vm_page_throttled_count++;
9bccf70c 2797 } else {
b0d623f7 2798 if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
2d21ac55 2799 vm_page_speculate(m, FALSE);
b0d623f7 2800#if DEVELOPMENT || DEBUG
2d21ac55 2801 vm_page_speculative_recreated++;
b0d623f7 2802#endif
2d21ac55
A
2803 return;
2804 } else {
2805 if (m->zero_fill) {
2806 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2807 vm_zf_queue_count++;
2808 } else {
2809 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2810 }
2811 }
2812 m->inactive = TRUE;
2813 if (!m->fictitious) {
2814 vm_page_inactive_count++;
2815 token_new_pagecount++;
2816 }
9bccf70c 2817 }
1c79356b
A
2818 }
2819}
2820
2821/*
2822 * vm_page_activate:
2823 *
2824 * Put the specified page on the active list (if appropriate).
2825 *
2826 * The page queues must be locked.
2827 */
2828
2829void
2830vm_page_activate(
2831 register vm_page_t m)
2832{
2833 VM_PAGE_CHECK(m);
2d21ac55 2834#ifdef FIXME_4778297
91447636 2835 assert(m->object != kernel_object);
2d21ac55
A
2836#endif
2837 assert(m->phys_page != vm_page_guard_addr);
91447636 2838#if DEBUG
b0d623f7 2839 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2840#endif
1c79356b 2841 if (m->gobbled) {
b0d623f7 2842 assert( !VM_PAGE_WIRED(m));
1c79356b
A
2843 if (!m->private && !m->fictitious)
2844 vm_page_wire_count--;
2845 vm_page_gobble_count--;
2846 m->gobbled = FALSE;
2847 }
2848 if (m->private)
2849 return;
2850
2d21ac55
A
2851#if DEBUG
2852 if (m->active)
2853 panic("vm_page_activate: already active");
2854#endif
2855
2856 if (m->speculative) {
2857 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2858 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2859 }
2860
2861 VM_PAGE_QUEUES_REMOVE(m);
2862
b0d623f7 2863 if ( !VM_PAGE_WIRED(m)) {
91447636 2864 assert(!m->laundry);
2d21ac55
A
2865 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2866 if (!IP_VALID(memory_manager_default) &&
d1ecb069
A
2867 !m->fictitious && m->dirty && m->object->internal &&
2868 (m->object->purgable == VM_PURGABLE_DENY ||
2869 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2870 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2d21ac55
A
2871 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2872 m->throttled = TRUE;
2873 vm_page_throttled_count++;
9bccf70c 2874 } else {
2d21ac55
A
2875 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2876 m->active = TRUE;
2877 if (!m->fictitious)
2878 vm_page_active_count++;
9bccf70c 2879 }
2d21ac55
A
2880 m->reference = TRUE;
2881 m->no_cache = FALSE;
1c79356b 2882 }
b0d623f7 2883 VM_PAGE_CHECK(m);
2d21ac55
A
2884}
2885
2886
2887/*
2888 * vm_page_speculate:
2889 *
2890 * Put the specified page on the speculative list (if appropriate).
2891 *
2892 * The page queues must be locked.
2893 */
2894void
2895vm_page_speculate(
2896 vm_page_t m,
2897 boolean_t new)
2898{
2899 struct vm_speculative_age_q *aq;
2900
2901 VM_PAGE_CHECK(m);
2902 assert(m->object != kernel_object);
2d21ac55 2903 assert(m->phys_page != vm_page_guard_addr);
91447636 2904#if DEBUG
b0d623f7 2905 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
91447636 2906#endif
b0d623f7
A
2907
2908 VM_PAGE_QUEUES_REMOVE(m);
2909
2910 if ( !VM_PAGE_WIRED(m)) {
2d21ac55 2911 mach_timespec_t ts;
b0d623f7
A
2912 clock_sec_t sec;
2913 clock_nsec_t nsec;
2d21ac55 2914
b0d623f7
A
2915 clock_get_system_nanotime(&sec, &nsec);
2916 ts.tv_sec = (unsigned int) sec;
2917 ts.tv_nsec = nsec;
2d21ac55
A
2918
2919 if (vm_page_speculative_count == 0) {
2920
2921 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2922 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2923
2924 aq = &vm_page_queue_speculative[speculative_age_index];
2925
2926 /*
2927 * set the timer to begin a new group
2928 */
2929 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2930 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2931
2932 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2933 } else {
2934 aq = &vm_page_queue_speculative[speculative_age_index];
2935
2936 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2937
2938 speculative_age_index++;
2939
2940 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2941 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2942 if (speculative_age_index == speculative_steal_index) {
2943 speculative_steal_index = speculative_age_index + 1;
2944
2945 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2946 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2947 }
2948 aq = &vm_page_queue_speculative[speculative_age_index];
2949
2950 if (!queue_empty(&aq->age_q))
2951 vm_page_speculate_ageit(aq);
2952
2953 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2954 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2955
2956 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2957 }
2958 }
2959 enqueue_tail(&aq->age_q, &m->pageq);
2960 m->speculative = TRUE;
2961 vm_page_speculative_count++;
2962
2963 if (new == TRUE) {
2964 m->object->pages_created++;
b0d623f7 2965#if DEVELOPMENT || DEBUG
2d21ac55 2966 vm_page_speculative_created++;
b0d623f7 2967#endif
2d21ac55
A
2968 }
2969 }
b0d623f7 2970 VM_PAGE_CHECK(m);
2d21ac55
A
2971}
2972
2973
2974/*
2975 * move pages from the specified aging bin to
2976 * the speculative bin that pageout_scan claims from
2977 *
2978 * The page queues must be locked.
2979 */
2980void
2981vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2982{
2983 struct vm_speculative_age_q *sq;
2984 vm_page_t t;
2985
2986 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2987
2988 if (queue_empty(&sq->age_q)) {
2989 sq->age_q.next = aq->age_q.next;
2990 sq->age_q.prev = aq->age_q.prev;
2991
2992 t = (vm_page_t)sq->age_q.next;
2993 t->pageq.prev = &sq->age_q;
2994
2995 t = (vm_page_t)sq->age_q.prev;
2996 t->pageq.next = &sq->age_q;
2997 } else {
2998 t = (vm_page_t)sq->age_q.prev;
2999 t->pageq.next = aq->age_q.next;
3000
3001 t = (vm_page_t)aq->age_q.next;
3002 t->pageq.prev = sq->age_q.prev;
3003
3004 t = (vm_page_t)aq->age_q.prev;
3005 t->pageq.next = &sq->age_q;
3006
3007 sq->age_q.prev = aq->age_q.prev;
1c79356b 3008 }
2d21ac55
A
3009 queue_init(&aq->age_q);
3010}
3011
3012
3013void
3014vm_page_lru(
3015 vm_page_t m)
3016{
3017 VM_PAGE_CHECK(m);
3018 assert(m->object != kernel_object);
3019 assert(m->phys_page != vm_page_guard_addr);
3020
3021#if DEBUG
b0d623f7 3022 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
3023#endif
3024 if (m->active || m->reference)
3025 return;
3026
b0d623f7 3027 if (m->private || (VM_PAGE_WIRED(m)))
2d21ac55
A
3028 return;
3029
3030 m->no_cache = FALSE;
3031
3032 VM_PAGE_QUEUES_REMOVE(m);
3033
3034 assert(!m->laundry);
3035 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
3036
3037 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
3038 m->inactive = TRUE;
3039
3040 vm_page_inactive_count++;
3041 token_new_pagecount++;
1c79356b
A
3042}
3043
2d21ac55 3044
b0d623f7
A
3045void
3046vm_page_reactivate_all_throttled(void)
3047{
3048 vm_page_t first_throttled, last_throttled;
3049 vm_page_t first_active;
3050 vm_page_t m;
3051 int extra_active_count;
3052
3053 extra_active_count = 0;
3054 vm_page_lock_queues();
3055 if (! queue_empty(&vm_page_queue_throttled)) {
3056 /*
3057 * Switch "throttled" pages to "active".
3058 */
3059 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3060 VM_PAGE_CHECK(m);
3061 assert(m->throttled);
3062 assert(!m->active);
3063 assert(!m->inactive);
3064 assert(!m->speculative);
3065 assert(!VM_PAGE_WIRED(m));
3066 if (!m->fictitious) {
3067 extra_active_count++;
3068 }
3069 m->throttled = FALSE;
3070 m->active = TRUE;
3071 VM_PAGE_CHECK(m);
3072 }
3073
3074 /*
3075 * Transfer the entire throttled queue to a regular LRU page queues.
3076 * We insert it at the head of the active queue, so that these pages
3077 * get re-evaluated by the LRU algorithm first, since they've been
3078 * completely out of it until now.
3079 */
3080 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3081 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3082 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3083 if (queue_empty(&vm_page_queue_active)) {
3084 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3085 } else {
3086 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3087 }
3088 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3089 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3090 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3091
3092#if DEBUG
3093 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3094#endif
3095 queue_init(&vm_page_queue_throttled);
3096 /*
3097 * Adjust the global page counts.
3098 */
3099 vm_page_active_count += extra_active_count;
3100 vm_page_throttled_count = 0;
3101 }
3102 assert(vm_page_throttled_count == 0);
3103 assert(queue_empty(&vm_page_queue_throttled));
3104 vm_page_unlock_queues();
3105}
3106
3107
3108/*
3109 * move pages from the indicated local queue to the global active queue
3110 * its ok to fail if we're below the hard limit and force == FALSE
3111 * the nolocks == TRUE case is to allow this function to be run on
3112 * the hibernate path
3113 */
3114
3115void
3116vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3117{
3118 struct vpl *lq;
3119 vm_page_t first_local, last_local;
3120 vm_page_t first_active;
3121 vm_page_t m;
3122 uint32_t count = 0;
3123
3124 if (vm_page_local_q == NULL)
3125 return;
3126
3127 lq = &vm_page_local_q[lid].vpl_un.vpl;
3128
3129 if (nolocks == FALSE) {
3130 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3131 if ( !vm_page_trylockspin_queues())
3132 return;
3133 } else
3134 vm_page_lockspin_queues();
3135
3136 VPL_LOCK(&lq->vpl_lock);
3137 }
3138 if (lq->vpl_count) {
3139 /*
3140 * Switch "local" pages to "active".
3141 */
3142 assert(!queue_empty(&lq->vpl_queue));
3143
3144 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3145 VM_PAGE_CHECK(m);
3146 assert(m->local);
3147 assert(!m->active);
3148 assert(!m->inactive);
3149 assert(!m->speculative);
3150 assert(!VM_PAGE_WIRED(m));
3151 assert(!m->throttled);
3152 assert(!m->fictitious);
3153
3154 if (m->local_id != lid)
3155 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3156
3157 m->local_id = 0;
3158 m->local = FALSE;
3159 m->active = TRUE;
3160 VM_PAGE_CHECK(m);
3161
3162 count++;
3163 }
3164 if (count != lq->vpl_count)
3165 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3166
3167 /*
3168 * Transfer the entire local queue to a regular LRU page queues.
3169 */
3170 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3171 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3172 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3173
3174 if (queue_empty(&vm_page_queue_active)) {
3175 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3176 } else {
3177 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3178 }
3179 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3180 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3181 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3182
3183 queue_init(&lq->vpl_queue);
3184 /*
3185 * Adjust the global page counts.
3186 */
3187 vm_page_active_count += lq->vpl_count;
3188 lq->vpl_count = 0;
3189 }
3190 assert(queue_empty(&lq->vpl_queue));
3191
3192 if (nolocks == FALSE) {
3193 VPL_UNLOCK(&lq->vpl_lock);
3194 vm_page_unlock_queues();
3195 }
3196}
3197
1c79356b
A
3198/*
3199 * vm_page_part_zero_fill:
3200 *
3201 * Zero-fill a part of the page.
3202 */
3203void
3204vm_page_part_zero_fill(
3205 vm_page_t m,
3206 vm_offset_t m_pa,
3207 vm_size_t len)
3208{
3209 vm_page_t tmp;
3210
3211 VM_PAGE_CHECK(m);
3212#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
55e303ae 3213 pmap_zero_part_page(m->phys_page, m_pa, len);
1c79356b
A
3214#else
3215 while (1) {
3216 tmp = vm_page_grab();
3217 if (tmp == VM_PAGE_NULL) {
3218 vm_page_wait(THREAD_UNINT);
3219 continue;
3220 }
3221 break;
3222 }
3223 vm_page_zero_fill(tmp);
3224 if(m_pa != 0) {
3225 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3226 }
3227 if((m_pa + len) < PAGE_SIZE) {
3228 vm_page_part_copy(m, m_pa + len, tmp,
3229 m_pa + len, PAGE_SIZE - (m_pa + len));
3230 }
3231 vm_page_copy(tmp,m);
b0d623f7 3232 VM_PAGE_FREE(tmp);
1c79356b
A
3233#endif
3234
3235}
3236
3237/*
3238 * vm_page_zero_fill:
3239 *
3240 * Zero-fill the specified page.
3241 */
3242void
3243vm_page_zero_fill(
3244 vm_page_t m)
3245{
3246 XPR(XPR_VM_PAGE,
3247 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
b0d623f7 3248 m->object, m->offset, m, 0,0);
1c79356b
A
3249
3250 VM_PAGE_CHECK(m);
3251
55e303ae
A
3252// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3253 pmap_zero_page(m->phys_page);
1c79356b
A
3254}
3255
3256/*
3257 * vm_page_part_copy:
3258 *
3259 * copy part of one page to another
3260 */
3261
3262void
3263vm_page_part_copy(
3264 vm_page_t src_m,
3265 vm_offset_t src_pa,
3266 vm_page_t dst_m,
3267 vm_offset_t dst_pa,
3268 vm_size_t len)
3269{
3270 VM_PAGE_CHECK(src_m);
3271 VM_PAGE_CHECK(dst_m);
3272
55e303ae
A
3273 pmap_copy_part_page(src_m->phys_page, src_pa,
3274 dst_m->phys_page, dst_pa, len);
1c79356b
A
3275}
3276
3277/*
3278 * vm_page_copy:
3279 *
3280 * Copy one page to another
91447636
A
3281 *
3282 * ENCRYPTED SWAP:
3283 * The source page should not be encrypted. The caller should
3284 * make sure the page is decrypted first, if necessary.
1c79356b
A
3285 */
3286
2d21ac55
A
3287int vm_page_copy_cs_validations = 0;
3288int vm_page_copy_cs_tainted = 0;
3289
1c79356b
A
3290void
3291vm_page_copy(
3292 vm_page_t src_m,
3293 vm_page_t dest_m)
3294{
3295 XPR(XPR_VM_PAGE,
3296 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
b0d623f7
A
3297 src_m->object, src_m->offset,
3298 dest_m->object, dest_m->offset,
1c79356b
A
3299 0);
3300
3301 VM_PAGE_CHECK(src_m);
3302 VM_PAGE_CHECK(dest_m);
3303
91447636
A
3304 /*
3305 * ENCRYPTED SWAP:
3306 * The source page should not be encrypted at this point.
3307 * The destination page will therefore not contain encrypted
3308 * data after the copy.
3309 */
3310 if (src_m->encrypted) {
3311 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3312 }
3313 dest_m->encrypted = FALSE;
3314
2d21ac55 3315 if (src_m->object != VM_OBJECT_NULL &&
4a3eedf9 3316 src_m->object->code_signed) {
2d21ac55 3317 /*
4a3eedf9 3318 * We're copying a page from a code-signed object.
2d21ac55
A
3319 * Whoever ends up mapping the copy page might care about
3320 * the original page's integrity, so let's validate the
3321 * source page now.
3322 */
3323 vm_page_copy_cs_validations++;
3324 vm_page_validate_cs(src_m);
3325 }
3326 /*
b0d623f7
A
3327 * Propagate the cs_tainted bit to the copy page. Do not propagate
3328 * the cs_validated bit.
2d21ac55 3329 */
2d21ac55
A
3330 dest_m->cs_tainted = src_m->cs_tainted;
3331 if (dest_m->cs_tainted) {
2d21ac55
A
3332 vm_page_copy_cs_tainted++;
3333 }
3334
55e303ae 3335 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
1c79356b
A
3336}
3337
2d21ac55 3338#if MACH_ASSERT
b0d623f7
A
3339static void
3340_vm_page_print(
3341 vm_page_t p)
3342{
3343 printf("vm_page %p: \n", p);
3344 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3345 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3346 printf(" next=%p\n", p->next);
3347 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3348 printf(" wire_count=%u\n", p->wire_count);
3349
3350 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3351 (p->local ? "" : "!"),
3352 (p->inactive ? "" : "!"),
3353 (p->active ? "" : "!"),
3354 (p->pageout_queue ? "" : "!"),
3355 (p->speculative ? "" : "!"),
3356 (p->laundry ? "" : "!"));
3357 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3358 (p->free ? "" : "!"),
3359 (p->reference ? "" : "!"),
3360 (p->gobbled ? "" : "!"),
3361 (p->private ? "" : "!"),
3362 (p->throttled ? "" : "!"));
3363 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3364 (p->busy ? "" : "!"),
3365 (p->wanted ? "" : "!"),
3366 (p->tabled ? "" : "!"),
3367 (p->fictitious ? "" : "!"),
3368 (p->pmapped ? "" : "!"),
3369 (p->wpmapped ? "" : "!"));
3370 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3371 (p->pageout ? "" : "!"),
3372 (p->absent ? "" : "!"),
3373 (p->error ? "" : "!"),
3374 (p->dirty ? "" : "!"),
3375 (p->cleaning ? "" : "!"),
3376 (p->precious ? "" : "!"),
3377 (p->clustered ? "" : "!"));
3378 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3379 (p->overwriting ? "" : "!"),
3380 (p->restart ? "" : "!"),
3381 (p->unusual ? "" : "!"),
3382 (p->encrypted ? "" : "!"),
3383 (p->encrypted_cleaning ? "" : "!"));
3384 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3385 (p->list_req_pending ? "" : "!"),
3386 (p->dump_cleaning ? "" : "!"),
3387 (p->cs_validated ? "" : "!"),
3388 (p->cs_tainted ? "" : "!"),
3389 (p->no_cache ? "" : "!"));
3390 printf(" %szero_fill\n",
3391 (p->zero_fill ? "" : "!"));
3392
3393 printf("phys_page=0x%x\n", p->phys_page);
3394}
3395
1c79356b
A
3396/*
3397 * Check that the list of pages is ordered by
3398 * ascending physical address and has no holes.
3399 */
2d21ac55 3400static int
1c79356b
A
3401vm_page_verify_contiguous(
3402 vm_page_t pages,
3403 unsigned int npages)
3404{
3405 register vm_page_t m;
3406 unsigned int page_count;
91447636 3407 vm_offset_t prev_addr;
1c79356b 3408
55e303ae 3409 prev_addr = pages->phys_page;
1c79356b
A
3410 page_count = 1;
3411 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
55e303ae 3412 if (m->phys_page != prev_addr + 1) {
b0d623f7
A
3413 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3414 m, (long)prev_addr, m->phys_page);
2d21ac55 3415 printf("pages %p page_count %d\n", pages, page_count);
1c79356b
A
3416 panic("vm_page_verify_contiguous: not contiguous!");
3417 }
55e303ae 3418 prev_addr = m->phys_page;
1c79356b
A
3419 ++page_count;
3420 }
3421 if (page_count != npages) {
2d21ac55 3422 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
3423 pages, page_count, npages);
3424 panic("vm_page_verify_contiguous: count error");
3425 }
3426 return 1;
3427}
1c79356b
A
3428
3429
2d21ac55
A
3430/*
3431 * Check the free lists for proper length etc.
3432 */
b0d623f7
A
3433static unsigned int
3434vm_page_verify_free_list(
d1ecb069 3435 queue_head_t *vm_page_queue,
b0d623f7
A
3436 unsigned int color,
3437 vm_page_t look_for_page,
3438 boolean_t expect_page)
3439{
3440 unsigned int npages;
3441 vm_page_t m;
3442 vm_page_t prev_m;
3443 boolean_t found_page;
3444
3445 found_page = FALSE;
3446 npages = 0;
d1ecb069
A
3447 prev_m = (vm_page_t) vm_page_queue;
3448 queue_iterate(vm_page_queue,
b0d623f7
A
3449 m,
3450 vm_page_t,
3451 pageq) {
3452 if (m == look_for_page) {
3453 found_page = TRUE;
3454 }
3455 if ((vm_page_t) m->pageq.prev != prev_m)
3456 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3457 color, npages, m, m->pageq.prev, prev_m);
3458 if ( ! m->free )
3459 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3460 color, npages, m);
3461 if ( ! m->busy )
3462 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3463 color, npages, m);
d1ecb069 3464 if ( color != (unsigned int) -1 && (m->phys_page & vm_color_mask) != color)
b0d623f7
A
3465 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3466 color, npages, m, m->phys_page & vm_color_mask, color);
3467 ++npages;
3468 prev_m = m;
3469 }
3470 if (look_for_page != VM_PAGE_NULL) {
3471 unsigned int other_color;
3472
3473 if (expect_page && !found_page) {
3474 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3475 color, npages, look_for_page, look_for_page->phys_page);
3476 _vm_page_print(look_for_page);
3477 for (other_color = 0;
3478 other_color < vm_colors;
3479 other_color++) {
3480 if (other_color == color)
3481 continue;
d1ecb069
A
3482 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3483 other_color, look_for_page, FALSE);
b0d623f7 3484 }
d1ecb069
A
3485 if (color != (unsigned int) -1) {
3486 vm_page_verify_free_list(&vm_lopage_queue_free,
3487 (unsigned int) -1, look_for_page, FALSE);
3488 }
3489
b0d623f7
A
3490 panic("vm_page_verify_free_list(color=%u)\n", color);
3491 }
3492 if (!expect_page && found_page) {
3493 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3494 color, npages, look_for_page, look_for_page->phys_page);
3495 }
3496 }
3497 return npages;
3498}
3499
3500static boolean_t vm_page_verify_free_lists_enabled = FALSE;
2d21ac55
A
3501static void
3502vm_page_verify_free_lists( void )
3503{
d1ecb069 3504 unsigned int color, npages, nlopages;
b0d623f7
A
3505
3506 if (! vm_page_verify_free_lists_enabled)
3507 return;
3508
2d21ac55 3509 npages = 0;
b0d623f7
A
3510
3511 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3512
3513 for( color = 0; color < vm_colors; color++ ) {
d1ecb069
A
3514 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3515 color, VM_PAGE_NULL, FALSE);
2d21ac55 3516 }
2d21ac55 3517
d1ecb069
A
3518 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3519 (unsigned int) -1,
3520 VM_PAGE_NULL, FALSE);
3521 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3522 panic("vm_page_verify_free_lists: "
3523 "npages %u free_count %d nlopages %u lo_free_count %u",
3524 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
b0d623f7 3525 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3526}
2d21ac55 3527
b0d623f7
A
3528void
3529vm_page_queues_assert(
3530 vm_page_t mem,
3531 int val)
3532{
3533 if (mem->free + mem->active + mem->inactive + mem->speculative +
3534 mem->throttled + mem->pageout_queue > (val)) {
3535 _vm_page_print(mem);
3536 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3537 }
3538 if (VM_PAGE_WIRED(mem)) {
3539 assert(!mem->active);
3540 assert(!mem->inactive);
3541 assert(!mem->speculative);
3542 assert(!mem->throttled);
3543 }
3544}
3545#endif /* MACH_ASSERT */
2d21ac55 3546
91447636 3547
1c79356b 3548/*
2d21ac55 3549 * CONTIGUOUS PAGE ALLOCATION
2d21ac55
A
3550 *
3551 * Find a region large enough to contain at least n pages
1c79356b
A
3552 * of contiguous physical memory.
3553 *
2d21ac55
A
3554 * This is done by traversing the vm_page_t array in a linear fashion
3555 * we assume that the vm_page_t array has the avaiable physical pages in an
3556 * ordered, ascending list... this is currently true of all our implementations
3557 * and must remain so... there can be 'holes' in the array... we also can
3558 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3559 * which use to happen via 'vm_page_convert'... that function was no longer
3560 * being called and was removed...
3561 *
3562 * The basic flow consists of stabilizing some of the interesting state of
3563 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3564 * sweep at the beginning of the array looking for pages that meet our criterea
3565 * for a 'stealable' page... currently we are pretty conservative... if the page
3566 * meets this criterea and is physically contiguous to the previous page in the 'run'
3567 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3568 * and start to develop a new run... if at this point we've already considered
3569 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3570 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3571 * to other threads trying to acquire free pages (or move pages from q to q),
3572 * and then continue from the spot we left off... we only make 1 pass through the
3573 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3574 * which steals the pages from the queues they're currently on... pages on the free
3575 * queue can be stolen directly... pages that are on any of the other queues
3576 * must be removed from the object they are tabled on... this requires taking the
3577 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3578 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3579 * dump the pages we've currently stolen back to the free list, and pick up our
3580 * scan from the point where we aborted the 'current' run.
3581 *
3582 *
1c79356b 3583 * Requirements:
2d21ac55 3584 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 3585 *
2d21ac55 3586 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 3587 *
e5568f75 3588 * Algorithm:
1c79356b 3589 */
2d21ac55
A
3590
3591#define MAX_CONSIDERED_BEFORE_YIELD 1000
3592
3593
3594#define RESET_STATE_OF_RUN() \
3595 MACRO_BEGIN \
3596 prevcontaddr = -2; \
b0d623f7 3597 start_pnum = -1; \
2d21ac55
A
3598 free_considered = 0; \
3599 substitute_needed = 0; \
3600 npages = 0; \
3601 MACRO_END
3602
b0d623f7
A
3603/*
3604 * Can we steal in-use (i.e. not free) pages when searching for
3605 * physically-contiguous pages ?
3606 */
3607#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3608
3609static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3610#if DEBUG
3611int vm_page_find_contig_debug = 0;
3612#endif
2d21ac55 3613
1c79356b
A
3614static vm_page_t
3615vm_page_find_contiguous(
2d21ac55
A
3616 unsigned int contig_pages,
3617 ppnum_t max_pnum,
b0d623f7
A
3618 ppnum_t pnum_mask,
3619 boolean_t wire,
3620 int flags)
1c79356b 3621{
2d21ac55 3622 vm_page_t m = NULL;
e5568f75 3623 ppnum_t prevcontaddr;
b0d623f7
A
3624 ppnum_t start_pnum;
3625 unsigned int npages, considered, scanned;
3626 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3627 unsigned int idx_last_contig_page_found = 0;
2d21ac55
A
3628 int free_considered, free_available;
3629 int substitute_needed;
b0d623f7 3630 boolean_t wrapped;
593a1d5f 3631#if DEBUG
b0d623f7
A
3632 clock_sec_t tv_start_sec, tv_end_sec;
3633 clock_usec_t tv_start_usec, tv_end_usec;
593a1d5f
A
3634#endif
3635#if MACH_ASSERT
2d21ac55
A
3636 int yielded = 0;
3637 int dumped_run = 0;
3638 int stolen_pages = 0;
91447636 3639#endif
1c79356b 3640
2d21ac55 3641 if (contig_pages == 0)
1c79356b
A
3642 return VM_PAGE_NULL;
3643
2d21ac55
A
3644#if MACH_ASSERT
3645 vm_page_verify_free_lists();
593a1d5f
A
3646#endif
3647#if DEBUG
2d21ac55
A
3648 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3649#endif
3650 vm_page_lock_queues();
b0d623f7 3651 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3652
3653 RESET_STATE_OF_RUN();
1c79356b 3654
b0d623f7 3655 scanned = 0;
2d21ac55
A
3656 considered = 0;
3657 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 3658
b0d623f7
A
3659 wrapped = FALSE;
3660
3661 if(flags & KMA_LOMEM)
3662 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3663 else
3664 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3665
3666 orig_last_idx = idx_last_contig_page_found;
3667 last_idx = orig_last_idx;
3668
3669 for (page_idx = last_idx, start_idx = last_idx;
2d21ac55
A
3670 npages < contig_pages && page_idx < vm_pages_count;
3671 page_idx++) {
b0d623f7
A
3672retry:
3673 if (wrapped &&
3674 npages == 0 &&
3675 page_idx >= orig_last_idx) {
3676 /*
3677 * We're back where we started and we haven't
3678 * found any suitable contiguous range. Let's
3679 * give up.
3680 */
3681 break;
3682 }
3683 scanned++;
2d21ac55 3684 m = &vm_pages[page_idx];
e5568f75 3685
b0d623f7
A
3686 assert(!m->fictitious);
3687 assert(!m->private);
3688
2d21ac55
A
3689 if (max_pnum && m->phys_page > max_pnum) {
3690 /* no more low pages... */
3691 break;
e5568f75 3692 }
b0d623f7 3693 if ( !(flags & KMA_LOMEM) && m->phys_page <= vm_lopage_poolend &&
2d21ac55
A
3694 m->phys_page >= vm_lopage_poolstart) {
3695 /*
3696 * don't want to take pages from our
3697 * reserved pool of low memory
3698 * so don't consider it which
3699 * means starting a new run
3700 */
3701 RESET_STATE_OF_RUN();
e5568f75 3702
d1ecb069 3703 } else if (!npages && ((m->phys_page & pnum_mask) != 0)) {
b0d623f7
A
3704 /*
3705 * not aligned
3706 */
3707 RESET_STATE_OF_RUN();
3708
3709 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
2d21ac55
A
3710 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3711 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
b0d623f7
A
3712 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3713 m->pageout) {
2d21ac55
A
3714 /*
3715 * page is in a transient state
3716 * or a state we don't want to deal
3717 * with, so don't consider it which
3718 * means starting a new run
3719 */
3720 RESET_STATE_OF_RUN();
1c79356b 3721
2d21ac55
A
3722 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3723 /*
3724 * page needs to be on one of our queues
3725 * in order for it to be stable behind the
3726 * locks we hold at this point...
3727 * if not, don't consider it which
3728 * means starting a new run
3729 */
3730 RESET_STATE_OF_RUN();
3731
3732 } else if (!m->free && (!m->tabled || m->busy)) {
3733 /*
3734 * pages on the free list are always 'busy'
3735 * so we couldn't test for 'busy' in the check
3736 * for the transient states... pages that are
3737 * 'free' are never 'tabled', so we also couldn't
3738 * test for 'tabled'. So we check here to make
3739 * sure that a non-free page is not busy and is
3740 * tabled on an object...
3741 * if not, don't consider it which
3742 * means starting a new run
3743 */
3744 RESET_STATE_OF_RUN();
3745
3746 } else {
3747 if (m->phys_page != prevcontaddr + 1) {
b0d623f7
A
3748 if ((m->phys_page & pnum_mask) != 0) {
3749 RESET_STATE_OF_RUN();
3750 goto did_consider;
3751 } else {
3752 npages = 1;
3753 start_idx = page_idx;
3754 start_pnum = m->phys_page;
3755 }
2d21ac55
A
3756 } else {
3757 npages++;
e5568f75 3758 }
2d21ac55 3759 prevcontaddr = m->phys_page;
b0d623f7
A
3760
3761 VM_PAGE_CHECK(m);
2d21ac55
A
3762 if (m->free) {
3763 free_considered++;
b0d623f7
A
3764 } else {
3765 /*
3766 * This page is not free.
3767 * If we can't steal used pages,
3768 * we have to give up this run
3769 * and keep looking.
3770 * Otherwise, we might need to
3771 * move the contents of this page
3772 * into a substitute page.
3773 */
3774#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3775 if (m->pmapped || m->dirty) {
3776 substitute_needed++;
3777 }
3778#else
3779 RESET_STATE_OF_RUN();
3780#endif
2d21ac55 3781 }
b0d623f7 3782
2d21ac55
A
3783 if ((free_considered + substitute_needed) > free_available) {
3784 /*
3785 * if we let this run continue
3786 * we will end up dropping the vm_page_free_count
3787 * below the reserve limit... we need to abort
3788 * this run, but we can at least re-consider this
3789 * page... thus the jump back to 'retry'
3790 */
3791 RESET_STATE_OF_RUN();
3792
3793 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3794 considered++;
3795 goto retry;
e5568f75 3796 }
2d21ac55
A
3797 /*
3798 * free_available == 0
3799 * so can't consider any free pages... if
3800 * we went to retry in this case, we'd
3801 * get stuck looking at the same page
3802 * w/o making any forward progress
3803 * we also want to take this path if we've already
3804 * reached our limit that controls the lock latency
3805 */
e5568f75 3806 }
2d21ac55 3807 }
b0d623f7 3808did_consider:
2d21ac55
A
3809 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3810
b0d623f7 3811 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55 3812 vm_page_unlock_queues();
e5568f75 3813
2d21ac55
A
3814 mutex_pause(0);
3815
3816 vm_page_lock_queues();
b0d623f7 3817 lck_mtx_lock(&vm_page_queue_free_lock);
2d21ac55
A
3818
3819 RESET_STATE_OF_RUN();
1c79356b 3820 /*
2d21ac55
A
3821 * reset our free page limit since we
3822 * dropped the lock protecting the vm_page_free_queue
1c79356b 3823 */
2d21ac55
A
3824 free_available = vm_page_free_count - vm_page_free_reserved;
3825 considered = 0;
3826#if MACH_ASSERT
3827 yielded++;
3828#endif
3829 goto retry;
3830 }
3831 considered++;
3832 }
3833 m = VM_PAGE_NULL;
3834
b0d623f7
A
3835 if (npages != contig_pages) {
3836 if (!wrapped) {
3837 /*
3838 * We didn't find a contiguous range but we didn't
3839 * start from the very first page.
3840 * Start again from the very first page.
3841 */
3842 RESET_STATE_OF_RUN();
3843 if( flags & KMA_LOMEM)
3844 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3845 else
3846 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3847 last_idx = 0;
3848 page_idx = last_idx;
3849 wrapped = TRUE;
3850 goto retry;
3851 }
3852 lck_mtx_unlock(&vm_page_queue_free_lock);
3853 } else {
2d21ac55
A
3854 vm_page_t m1;
3855 vm_page_t m2;
3856 unsigned int cur_idx;
3857 unsigned int tmp_start_idx;
3858 vm_object_t locked_object = VM_OBJECT_NULL;
3859 boolean_t abort_run = FALSE;
3860
b0d623f7
A
3861 assert(page_idx - start_idx == contig_pages);
3862
2d21ac55
A
3863 tmp_start_idx = start_idx;
3864
3865 /*
3866 * first pass through to pull the free pages
3867 * off of the free queue so that in case we
3868 * need substitute pages, we won't grab any
3869 * of the free pages in the run... we'll clear
3870 * the 'free' bit in the 2nd pass, and even in
3871 * an abort_run case, we'll collect all of the
3872 * free pages in this run and return them to the free list
3873 */
3874 while (start_idx < page_idx) {
3875
3876 m1 = &vm_pages[start_idx++];
3877
b0d623f7
A
3878#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3879 assert(m1->free);
3880#endif
3881
2d21ac55 3882 if (m1->free) {
d1ecb069
A
3883 if ( m1->phys_page <= vm_lopage_poolend &&
3884 m1->phys_page >= vm_lopage_poolstart) {
2d21ac55 3885
d1ecb069 3886 assert( flags & KMA_LOMEM );
b0d623f7 3887#if MACH_ASSERT
d1ecb069
A
3888 vm_page_verify_free_list(&vm_lopage_queue_free,
3889 (unsigned int) -1, m1, TRUE);
b0d623f7 3890#endif
d1ecb069
A
3891 queue_remove(&vm_lopage_queue_free,
3892 m1,
3893 vm_page_t,
3894 pageq);
3895 vm_lopage_free_count--;
3896
3897#if MACH_ASSERT
3898 vm_page_verify_free_list(&vm_lopage_queue_free,
3899 (unsigned int) -1, VM_PAGE_NULL, FALSE);
3900#endif
3901 } else {
3902
3903 unsigned int color;
3904
3905 color = m1->phys_page & vm_color_mask;
b0d623f7 3906#if MACH_ASSERT
d1ecb069
A
3907 vm_page_verify_free_list(&vm_page_queue_free[color],
3908 color, m1, TRUE);
b0d623f7 3909#endif
d1ecb069
A
3910 queue_remove(&vm_page_queue_free[color],
3911 m1,
3912 vm_page_t,
3913 pageq);
3914 vm_page_free_count--;
3915#if MACH_ASSERT
3916 vm_page_verify_free_list(&vm_page_queue_free[color],
3917 color, VM_PAGE_NULL, FALSE);
3918#endif
3919 }
3920
3921 m1->pageq.next = NULL;
3922 m1->pageq.prev = NULL;
b0d623f7
A
3923 /*
3924 * Clear the "free" bit so that this page
3925 * does not get considered for another
3926 * concurrent physically-contiguous allocation.
3927 */
3928 m1->free = FALSE;
3929 assert(m1->busy);
2d21ac55
A
3930 }
3931 }
3932 /*
3933 * adjust global freelist counts
3934 */
3935 if (vm_page_free_count < vm_page_free_count_minimum)
3936 vm_page_free_count_minimum = vm_page_free_count;
3937
b0d623f7
A
3938 if( flags & KMA_LOMEM)
3939 vm_page_lomem_find_contiguous_last_idx = page_idx;
3940 else
3941 vm_page_find_contiguous_last_idx = page_idx;
3942
2d21ac55
A
3943 /*
3944 * we can drop the free queue lock at this point since
3945 * we've pulled any 'free' candidates off of the list
3946 * we need it dropped so that we can do a vm_page_grab
3947 * when substituing for pmapped/dirty pages
3948 */
b0d623f7 3949 lck_mtx_unlock(&vm_page_queue_free_lock);
2d21ac55
A
3950
3951 start_idx = tmp_start_idx;
3952 cur_idx = page_idx - 1;
3953
3954 while (start_idx++ < page_idx) {
3955 /*
3956 * must go through the list from back to front
3957 * so that the page list is created in the
3958 * correct order - low -> high phys addresses
3959 */
3960 m1 = &vm_pages[cur_idx--];
3961
b0d623f7
A
3962 assert(!m1->free);
3963 if (m1->object == VM_OBJECT_NULL) {
2d21ac55 3964 /*
b0d623f7 3965 * page has already been removed from
2d21ac55
A
3966 * the free list in the 1st pass
3967 */
b0d623f7 3968 assert(m1->offset == (vm_object_offset_t) -1);
2d21ac55
A
3969 assert(m1->busy);
3970 assert(!m1->wanted);
3971 assert(!m1->laundry);
e5568f75 3972 } else {
2d21ac55
A
3973 vm_object_t object;
3974
3975 if (abort_run == TRUE)
3976 continue;
3977
3978 object = m1->object;
3979
3980 if (object != locked_object) {
3981 if (locked_object) {
3982 vm_object_unlock(locked_object);
3983 locked_object = VM_OBJECT_NULL;
3984 }
3985 if (vm_object_lock_try(object))
3986 locked_object = object;
3987 }
3988 if (locked_object == VM_OBJECT_NULL ||
b0d623f7 3989 (VM_PAGE_WIRED(m1) || m1->gobbled ||
2d21ac55
A
3990 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3991 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3992 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3993
3994 if (locked_object) {
3995 vm_object_unlock(locked_object);
3996 locked_object = VM_OBJECT_NULL;
3997 }
3998 tmp_start_idx = cur_idx;
3999 abort_run = TRUE;
4000 continue;
4001 }
4002 if (m1->pmapped || m1->dirty) {
4003 int refmod;
4004 vm_object_offset_t offset;
4005
4006 m2 = vm_page_grab();
4007
4008 if (m2 == VM_PAGE_NULL) {
4009 if (locked_object) {
4010 vm_object_unlock(locked_object);
4011 locked_object = VM_OBJECT_NULL;
4012 }
4013 tmp_start_idx = cur_idx;
4014 abort_run = TRUE;
4015 continue;
4016 }
4017 if (m1->pmapped)
4018 refmod = pmap_disconnect(m1->phys_page);
4019 else
4020 refmod = 0;
4021 vm_page_copy(m1, m2);
4022
4023 m2->reference = m1->reference;
4024 m2->dirty = m1->dirty;
4025
4026 if (refmod & VM_MEM_REFERENCED)
4027 m2->reference = TRUE;
4028 if (refmod & VM_MEM_MODIFIED)
4029 m2->dirty = TRUE;
4030 offset = m1->offset;
4031
4032 /*
4033 * completely cleans up the state
4034 * of the page so that it is ready
4035 * to be put onto the free list, or
4036 * for this purpose it looks like it
4037 * just came off of the free list
4038 */
4039 vm_page_free_prepare(m1);
4040
4041 /*
4042 * make sure we clear the ref/mod state
4043 * from the pmap layer... else we risk
4044 * inheriting state from the last time
4045 * this page was used...
4046 */
4047 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4048 /*
4049 * now put the substitute page on the object
4050 */
b0d623f7 4051 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
2d21ac55
A
4052
4053 if (m2->reference)
4054 vm_page_activate(m2);
4055 else
4056 vm_page_deactivate(m2);
4057
4058 PAGE_WAKEUP_DONE(m2);
4059
4060 } else {
4061 /*
4062 * completely cleans up the state
4063 * of the page so that it is ready
4064 * to be put onto the free list, or
4065 * for this purpose it looks like it
4066 * just came off of the free list
4067 */
4068 vm_page_free_prepare(m1);
4069 }
4070#if MACH_ASSERT
4071 stolen_pages++;
4072#endif
1c79356b 4073 }
2d21ac55
A
4074 m1->pageq.next = (queue_entry_t) m;
4075 m1->pageq.prev = NULL;
4076 m = m1;
e5568f75 4077 }
2d21ac55
A
4078 if (locked_object) {
4079 vm_object_unlock(locked_object);
4080 locked_object = VM_OBJECT_NULL;
1c79356b
A
4081 }
4082
2d21ac55
A
4083 if (abort_run == TRUE) {
4084 if (m != VM_PAGE_NULL) {
b0d623f7 4085 vm_page_free_list(m, FALSE);
2d21ac55
A
4086 }
4087#if MACH_ASSERT
4088 dumped_run++;
4089#endif
4090 /*
4091 * want the index of the last
4092 * page in this run that was
4093 * successfully 'stolen', so back
4094 * it up 1 for the auto-decrement on use
4095 * and 1 more to bump back over this page
4096 */
4097 page_idx = tmp_start_idx + 2;
b0d623f7
A
4098 if (page_idx >= vm_pages_count) {
4099 if (wrapped)
4100 goto done_scanning;
4101 page_idx = last_idx = 0;
4102 wrapped = TRUE;
4103 }
4104 abort_run = FALSE;
4105
2d21ac55 4106 /*
b0d623f7
A
4107 * We didn't find a contiguous range but we didn't
4108 * start from the very first page.
4109 * Start again from the very first page.
2d21ac55 4110 */
b0d623f7
A
4111 RESET_STATE_OF_RUN();
4112
4113 if( flags & KMA_LOMEM)
4114 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4115 else
4116 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4117
4118 last_idx = page_idx;
2d21ac55 4119
b0d623f7
A
4120 lck_mtx_lock(&vm_page_queue_free_lock);
4121 /*
4122 * reset our free page limit since we
4123 * dropped the lock protecting the vm_page_free_queue
4124 */
4125 free_available = vm_page_free_count - vm_page_free_reserved;
2d21ac55
A
4126 goto retry;
4127 }
e5568f75 4128
e5568f75 4129 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55
A
4130
4131 if (wire == TRUE)
4132 m1->wire_count++;
4133 else
4134 m1->gobbled = TRUE;
e5568f75 4135 }
2d21ac55
A
4136 if (wire == FALSE)
4137 vm_page_gobble_count += npages;
4138
4139 /*
4140 * gobbled pages are also counted as wired pages
4141 */
e5568f75 4142 vm_page_wire_count += npages;
e5568f75 4143
2d21ac55
A
4144 assert(vm_page_verify_contiguous(m, npages));
4145 }
4146done_scanning:
4147 vm_page_unlock_queues();
4148
593a1d5f 4149#if DEBUG
2d21ac55
A
4150 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4151
4152 tv_end_sec -= tv_start_sec;
4153 if (tv_end_usec < tv_start_usec) {
4154 tv_end_sec--;
4155 tv_end_usec += 1000000;
1c79356b 4156 }
2d21ac55
A
4157 tv_end_usec -= tv_start_usec;
4158 if (tv_end_usec >= 1000000) {
4159 tv_end_sec++;
4160 tv_end_sec -= 1000000;
4161 }
b0d623f7
A
4162 if (vm_page_find_contig_debug) {
4163 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4164 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4165 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4166 scanned, yielded, dumped_run, stolen_pages);
4167 }
e5568f75 4168
593a1d5f
A
4169#endif
4170#if MACH_ASSERT
2d21ac55
A
4171 vm_page_verify_free_lists();
4172#endif
e5568f75 4173 return m;
1c79356b
A
4174}
4175
4176/*
4177 * Allocate a list of contiguous, wired pages.
4178 */
4179kern_return_t
4180cpm_allocate(
4181 vm_size_t size,
4182 vm_page_t *list,
2d21ac55 4183 ppnum_t max_pnum,
b0d623f7
A
4184 ppnum_t pnum_mask,
4185 boolean_t wire,
4186 int flags)
1c79356b 4187{
91447636
A
4188 vm_page_t pages;
4189 unsigned int npages;
1c79356b
A
4190
4191 if (size % page_size != 0)
4192 return KERN_INVALID_ARGUMENT;
4193
b0d623f7
A
4194 npages = (unsigned int) (size / PAGE_SIZE);
4195 if (npages != size / PAGE_SIZE) {
4196 /* 32-bit overflow */
4197 return KERN_INVALID_ARGUMENT;
4198 }
1c79356b 4199
1c79356b
A
4200 /*
4201 * Obtain a pointer to a subset of the free
4202 * list large enough to satisfy the request;
4203 * the region will be physically contiguous.
4204 */
b0d623f7 4205 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
e5568f75 4206
2d21ac55 4207 if (pages == VM_PAGE_NULL)
1c79356b 4208 return KERN_NO_SPACE;
1c79356b 4209 /*
2d21ac55 4210 * determine need for wakeups
1c79356b 4211 */
2d21ac55
A
4212 if ((vm_page_free_count < vm_page_free_min) ||
4213 ((vm_page_free_count < vm_page_free_target) &&
4214 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
e5568f75 4215 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55
A
4216
4217#if CONFIG_EMBEDDED
4218 {
4219 int percent_avail;
e5568f75 4220
2d21ac55
A
4221 /*
4222 * Decide if we need to poke the memorystatus notification thread.
4223 */
4224 percent_avail =
4225 (vm_page_active_count + vm_page_inactive_count +
4226 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 4227 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
4228 atop_64(max_mem);
4229 if (percent_avail <= (kern_memorystatus_level - 5)) {
4230 kern_memorystatus_level = percent_avail;
4231 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4232 }
4233 }
4234#endif
1c79356b
A
4235 /*
4236 * The CPM pages should now be available and
4237 * ordered by ascending physical address.
4238 */
4239 assert(vm_page_verify_contiguous(pages, npages));
4240
4241 *list = pages;
4242 return KERN_SUCCESS;
4243}
2d21ac55 4244
b0d623f7
A
4245/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4246
d1ecb069
A
4247#if HIBERNATION
4248
b0d623f7
A
4249static vm_page_t hibernate_gobble_queue;
4250
4251static void
4252hibernate_page_list_zero(hibernate_page_list_t *list)
4253{
4254 uint32_t bank;
4255 hibernate_bitmap_t * bitmap;
4256
4257 bitmap = &list->bank_bitmap[0];
4258 for (bank = 0; bank < list->bank_count; bank++)
4259 {
4260 uint32_t last_bit;
4261
4262 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4263 // set out-of-bound bits at end of bitmap.
4264 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4265 if (last_bit)
4266 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4267
4268 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4269 }
4270}
4271
4272void
4273hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4274{
4275 uint32_t i;
4276 vm_page_t m;
4277 uint64_t start, end, timeout, nsec;
4278 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4279 clock_get_uptime(&start);
4280
4281 for (i = 0; i < gobble_count; i++)
4282 {
4283 while (VM_PAGE_NULL == (m = vm_page_grab()))
4284 {
4285 clock_get_uptime(&end);
4286 if (end >= timeout)
4287 break;
4288 VM_PAGE_WAIT();
4289 }
4290 if (!m)
4291 break;
4292 m->busy = FALSE;
4293 vm_page_gobble(m);
4294
4295 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4296 hibernate_gobble_queue = m;
4297 }
4298
4299 clock_get_uptime(&end);
4300 absolutetime_to_nanoseconds(end - start, &nsec);
4301 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4302}
4303
4304void
4305hibernate_free_gobble_pages(void)
4306{
4307 vm_page_t m, next;
4308 uint32_t count = 0;
4309
4310 m = (vm_page_t) hibernate_gobble_queue;
4311 while(m)
4312 {
4313 next = (vm_page_t) m->pageq.next;
4314 vm_page_free(m);
4315 count++;
4316 m = next;
4317 }
4318 hibernate_gobble_queue = VM_PAGE_NULL;
4319
4320 if (count)
4321 HIBLOG("Freed %d pages\n", count);
4322}
4323
4324static boolean_t
4325hibernate_consider_discard(vm_page_t m)
4326{
4327 vm_object_t object = NULL;
4328 int refmod_state;
4329 boolean_t discard = FALSE;
4330
4331 do
4332 {
4333 if(m->private)
4334 panic("hibernate_consider_discard: private");
4335
4336 if (!vm_object_lock_try(m->object))
4337 break;
4338
4339 object = m->object;
4340
4341 if (VM_PAGE_WIRED(m))
4342 break;
4343 if (m->precious)
4344 break;
4345
4346 if (m->busy || !object->alive)
4347 /*
4348 * Somebody is playing with this page.
4349 */
4350 break;
4351
4352 if (m->absent || m->unusual || m->error)
4353 /*
4354 * If it's unusual in anyway, ignore it
4355 */
4356 break;
4357
4358 if (m->cleaning)
4359 break;
4360
4361 if (m->laundry || m->list_req_pending)
4362 break;
4363
4364 if (!m->dirty)
4365 {
4366 refmod_state = pmap_get_refmod(m->phys_page);
4367
4368 if (refmod_state & VM_MEM_REFERENCED)
4369 m->reference = TRUE;
4370 if (refmod_state & VM_MEM_MODIFIED)
4371 m->dirty = TRUE;
4372 }
4373
4374 /*
4375 * If it's clean or purgeable we can discard the page on wakeup.
4376 */
4377 discard = (!m->dirty)
4378 || (VM_PURGABLE_VOLATILE == object->purgable)
4379 || (VM_PURGABLE_EMPTY == m->object->purgable);
4380 }
4381 while (FALSE);
4382
4383 if (object)
4384 vm_object_unlock(object);
4385
4386 return (discard);
4387}
4388
4389
4390static void
4391hibernate_discard_page(vm_page_t m)
4392{
4393 if (m->absent || m->unusual || m->error)
4394 /*
4395 * If it's unusual in anyway, ignore
4396 */
4397 return;
4398
4399 if (m->pmapped == TRUE)
4400 {
4401 __unused int refmod_state = pmap_disconnect(m->phys_page);
4402 }
4403
4404 if (m->laundry)
4405 panic("hibernate_discard_page(%p) laundry", m);
4406 if (m->private)
4407 panic("hibernate_discard_page(%p) private", m);
4408 if (m->fictitious)
4409 panic("hibernate_discard_page(%p) fictitious", m);
4410
4411 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4412 {
4413 /* object should be on a queue */
4414 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4415 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4416 assert(old_queue);
4417 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4418 makes sure these locks are uncontended before sleep */
4419 vm_purgeable_token_delete_first(old_queue);
4420 m->object->purgable = VM_PURGABLE_EMPTY;
4421 }
4422
4423 vm_page_free(m);
4424}
4425
4426/*
4427 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4428 pages known to VM to not need saving are subtracted.
4429 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4430*/
4431
4432void
4433hibernate_page_list_setall(hibernate_page_list_t * page_list,
4434 hibernate_page_list_t * page_list_wired,
4435 uint32_t * pagesOut)
4436{
4437 uint64_t start, end, nsec;
4438 vm_page_t m;
4439 uint32_t pages = page_list->page_count;
4440 uint32_t count_zf = 0, count_throttled = 0;
4441 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
4442 uint32_t count_wire = pages;
4443 uint32_t count_discard_active = 0;
4444 uint32_t count_discard_inactive = 0;
4445 uint32_t count_discard_purgeable = 0;
4446 uint32_t count_discard_speculative = 0;
4447 uint32_t i;
4448 uint32_t bank;
4449 hibernate_bitmap_t * bitmap;
4450 hibernate_bitmap_t * bitmap_wired;
4451
4452
4453 HIBLOG("hibernate_page_list_setall start\n");
4454
4455 clock_get_uptime(&start);
4456
4457 hibernate_page_list_zero(page_list);
4458 hibernate_page_list_zero(page_list_wired);
4459
4460 if (vm_page_local_q) {
4461 for (i = 0; i < vm_page_local_q_count; i++)
4462 vm_page_reactivate_local(i, TRUE, TRUE);
4463 }
4464
4465 m = (vm_page_t) hibernate_gobble_queue;
4466 while(m)
4467 {
4468 pages--;
4469 count_wire--;
4470 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4471 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4472 m = (vm_page_t) m->pageq.next;
4473 }
4474
4475 for( i = 0; i < vm_colors; i++ )
4476 {
4477 queue_iterate(&vm_page_queue_free[i],
4478 m,
4479 vm_page_t,
4480 pageq)
4481 {
4482 pages--;
4483 count_wire--;
4484 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4485 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4486 }
4487 }
4488
4489 queue_iterate(&vm_lopage_queue_free,
4490 m,
4491 vm_page_t,
4492 pageq)
4493 {
4494 pages--;
4495 count_wire--;
4496 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4497 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4498 }
4499
4500 queue_iterate( &vm_page_queue_throttled,
4501 m,
4502 vm_page_t,
4503 pageq )
4504 {
4505 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4506 && hibernate_consider_discard(m))
4507 {
4508 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4509 count_discard_inactive++;
4510 }
4511 else
4512 count_throttled++;
4513 count_wire--;
4514 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4515 }
4516
4517 queue_iterate( &vm_page_queue_zf,
4518 m,
4519 vm_page_t,
4520 pageq )
4521 {
4522 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4523 && hibernate_consider_discard(m))
4524 {
4525 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4526 if (m->dirty)
4527 count_discard_purgeable++;
4528 else
4529 count_discard_inactive++;
4530 }
4531 else
4532 count_zf++;
4533 count_wire--;
4534 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4535 }
4536
4537 queue_iterate( &vm_page_queue_inactive,
4538 m,
4539 vm_page_t,
4540 pageq )
4541 {
4542 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4543 && hibernate_consider_discard(m))
4544 {
4545 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4546 if (m->dirty)
4547 count_discard_purgeable++;
4548 else
4549 count_discard_inactive++;
4550 }
4551 else
4552 count_inactive++;
4553 count_wire--;
4554 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4555 }
4556
4557 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
4558 {
4559 queue_iterate(&vm_page_queue_speculative[i].age_q,
4560 m,
4561 vm_page_t,
4562 pageq)
4563 {
4564 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4565 && hibernate_consider_discard(m))
4566 {
4567 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4568 count_discard_speculative++;
4569 }
4570 else
4571 count_speculative++;
4572 count_wire--;
4573 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4574 }
4575 }
4576
4577 queue_iterate( &vm_page_queue_active,
4578 m,
4579 vm_page_t,
4580 pageq )
4581 {
4582 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
4583 && hibernate_consider_discard(m))
4584 {
4585 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4586 if (m->dirty)
4587 count_discard_purgeable++;
4588 else
4589 count_discard_active++;
4590 }
4591 else
4592 count_active++;
4593 count_wire--;
4594 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4595 }
4596
4597 // pull wired from hibernate_bitmap
4598
4599 bitmap = &page_list->bank_bitmap[0];
4600 bitmap_wired = &page_list_wired->bank_bitmap[0];
4601 for (bank = 0; bank < page_list->bank_count; bank++)
4602 {
4603 for (i = 0; i < bitmap->bitmapwords; i++)
4604 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
4605 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
4606 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
4607 }
4608
4609 // machine dependent adjustments
4610 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
4611
4612 clock_get_uptime(&end);
4613 absolutetime_to_nanoseconds(end - start, &nsec);
4614 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
4615
4616 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
4617 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
4618 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
4619
4620 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
4621}
4622
4623void
4624hibernate_page_list_discard(hibernate_page_list_t * page_list)
4625{
4626 uint64_t start, end, nsec;
4627 vm_page_t m;
4628 vm_page_t next;
4629 uint32_t i;
4630 uint32_t count_discard_active = 0;
4631 uint32_t count_discard_inactive = 0;
4632 uint32_t count_discard_purgeable = 0;
4633 uint32_t count_discard_speculative = 0;
4634
4635 clock_get_uptime(&start);
4636
4637 m = (vm_page_t) queue_first(&vm_page_queue_zf);
4638 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
4639 {
4640 next = (vm_page_t) m->pageq.next;
4641 if (hibernate_page_bittst(page_list, m->phys_page))
4642 {
4643 if (m->dirty)
4644 count_discard_purgeable++;
4645 else
4646 count_discard_inactive++;
4647 hibernate_discard_page(m);
4648 }
4649 m = next;
4650 }
4651
4652 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
4653 {
4654 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
4655 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
4656 {
4657 next = (vm_page_t) m->pageq.next;
4658 if (hibernate_page_bittst(page_list, m->phys_page))
4659 {
4660 count_discard_speculative++;
4661 hibernate_discard_page(m);
4662 }
4663 m = next;
4664 }
4665 }
4666
4667 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
4668 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
4669 {
4670 next = (vm_page_t) m->pageq.next;
4671 if (hibernate_page_bittst(page_list, m->phys_page))
4672 {
4673 if (m->dirty)
4674 count_discard_purgeable++;
4675 else
4676 count_discard_inactive++;
4677 hibernate_discard_page(m);
4678 }
4679 m = next;
4680 }
4681
4682 m = (vm_page_t) queue_first(&vm_page_queue_active);
4683 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
4684 {
4685 next = (vm_page_t) m->pageq.next;
4686 if (hibernate_page_bittst(page_list, m->phys_page))
4687 {
4688 if (m->dirty)
4689 count_discard_purgeable++;
4690 else
4691 count_discard_active++;
4692 hibernate_discard_page(m);
4693 }
4694 m = next;
4695 }
4696
4697 clock_get_uptime(&end);
4698 absolutetime_to_nanoseconds(end - start, &nsec);
4699 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
4700 nsec / 1000000ULL,
4701 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
4702}
4703
d1ecb069
A
4704#endif /* HIBERNATION */
4705
b0d623f7 4706/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
1c79356b
A
4707
4708#include <mach_vm_debug.h>
4709#if MACH_VM_DEBUG
4710
4711#include <mach_debug/hash_info.h>
4712#include <vm/vm_debug.h>
4713
4714/*
4715 * Routine: vm_page_info
4716 * Purpose:
4717 * Return information about the global VP table.
4718 * Fills the buffer with as much information as possible
4719 * and returns the desired size of the buffer.
4720 * Conditions:
4721 * Nothing locked. The caller should provide
4722 * possibly-pageable memory.
4723 */
4724
4725unsigned int
4726vm_page_info(
4727 hash_info_bucket_t *info,
4728 unsigned int count)
4729{
91447636 4730 unsigned int i;
b0d623f7 4731 lck_spin_t *bucket_lock;
1c79356b
A
4732
4733 if (vm_page_bucket_count < count)
4734 count = vm_page_bucket_count;
4735
4736 for (i = 0; i < count; i++) {
4737 vm_page_bucket_t *bucket = &vm_page_buckets[i];
4738 unsigned int bucket_count = 0;
4739 vm_page_t m;
4740
b0d623f7
A
4741 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
4742 lck_spin_lock(bucket_lock);
4743
1c79356b
A
4744 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
4745 bucket_count++;
b0d623f7
A
4746
4747 lck_spin_unlock(bucket_lock);
1c79356b
A
4748
4749 /* don't touch pageable memory while holding locks */
4750 info[i].hib_count = bucket_count;
4751 }
4752
4753 return vm_page_bucket_count;
4754}
4755#endif /* MACH_VM_DEBUG */
4756
4757#include <mach_kdb.h>
4758#if MACH_KDB
4759
4760#include <ddb/db_output.h>
4761#include <vm/vm_print.h>
4762#define printf kdbprintf
4763
4764/*
4765 * Routine: vm_page_print [exported]
4766 */
4767void
4768vm_page_print(
91447636 4769 db_addr_t db_addr)
1c79356b 4770{
91447636
A
4771 vm_page_t p;
4772
4773 p = (vm_page_t) (long) db_addr;
1c79356b
A
4774
4775 iprintf("page 0x%x\n", p);
4776
4777 db_indent += 2;
4778
4779 iprintf("object=0x%x", p->object);
4780 printf(", offset=0x%x", p->offset);
4781 printf(", wire_count=%d", p->wire_count);
1c79356b 4782
b0d623f7
A
4783 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
4784 (p->local ? "" : "!"),
1c79356b
A
4785 (p->inactive ? "" : "!"),
4786 (p->active ? "" : "!"),
2d21ac55 4787 (p->throttled ? "" : "!"),
1c79356b
A
4788 (p->gobbled ? "" : "!"),
4789 (p->laundry ? "" : "!"),
4790 (p->free ? "" : "!"),
4791 (p->reference ? "" : "!"),
91447636 4792 (p->encrypted ? "" : "!"));
1c79356b
A
4793 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
4794 (p->busy ? "" : "!"),
4795 (p->wanted ? "" : "!"),
4796 (p->tabled ? "" : "!"),
4797 (p->fictitious ? "" : "!"),
4798 (p->private ? "" : "!"),
4799 (p->precious ? "" : "!"));
4800 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
4801 (p->absent ? "" : "!"),
4802 (p->error ? "" : "!"),
4803 (p->dirty ? "" : "!"),
4804 (p->cleaning ? "" : "!"),
4805 (p->pageout ? "" : "!"),
4806 (p->clustered ? "" : "!"));
2d21ac55 4807 iprintf("%soverwriting, %srestart, %sunusual\n",
1c79356b
A
4808 (p->overwriting ? "" : "!"),
4809 (p->restart ? "" : "!"),
0b4e3aa0 4810 (p->unusual ? "" : "!"));
1c79356b 4811
55e303ae 4812 iprintf("phys_page=0x%x", p->phys_page);
1c79356b
A
4813
4814 db_indent -= 2;
4815}
4816#endif /* MACH_KDB */