]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_resident.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58/*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
91447636 65#include <debug.h>
2d21ac55 66#include <libkern/OSAtomic.h>
91447636 67
9bccf70c 68#include <mach/clock_types.h>
1c79356b
A
69#include <mach/vm_prot.h>
70#include <mach/vm_statistics.h>
2d21ac55 71#include <mach/sdt.h>
1c79356b
A
72#include <kern/counters.h>
73#include <kern/sched_prim.h>
74#include <kern/task.h>
75#include <kern/thread.h>
76#include <kern/zalloc.h>
77#include <kern/xpr.h>
78#include <vm/pmap.h>
79#include <vm/vm_init.h>
80#include <vm/vm_map.h>
81#include <vm/vm_page.h>
82#include <vm/vm_pageout.h>
83#include <vm/vm_kern.h> /* kernel_memory_allocate() */
84#include <kern/misc_protos.h>
85#include <zone_debug.h>
86#include <vm/cpm.h>
55e303ae
A
87#include <ppc/mappings.h> /* (BRINGUP) */
88#include <pexpert/pexpert.h> /* (BRINGUP) */
89
91447636 90#include <vm/vm_protos.h>
2d21ac55
A
91#include <vm/memory_object.h>
92#include <vm/vm_purgeable_internal.h>
93
94#if CONFIG_EMBEDDED
95#include <sys/kern_memorystatus.h>
96#endif
97
98int speculative_age_index = 0;
99int speculative_steal_index = 0;
100
101struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
102
0b4e3aa0 103
1c79356b
A
104/*
105 * Associated with page of user-allocatable memory is a
106 * page structure.
107 */
108
109/*
110 * These variables record the values returned by vm_page_bootstrap,
111 * for debugging purposes. The implementation of pmap_steal_memory
112 * and pmap_startup here also uses them internally.
113 */
114
115vm_offset_t virtual_space_start;
116vm_offset_t virtual_space_end;
117int vm_page_pages;
118
119/*
120 * The vm_page_lookup() routine, which provides for fast
121 * (virtual memory object, offset) to page lookup, employs
122 * the following hash table. The vm_page_{insert,remove}
123 * routines install and remove associations in the table.
124 * [This table is often called the virtual-to-physical,
125 * or VP, table.]
126 */
127typedef struct {
128 vm_page_t pages;
129#if MACH_PAGE_HASH_STATS
130 int cur_count; /* current count */
131 int hi_count; /* high water mark */
132#endif /* MACH_PAGE_HASH_STATS */
133} vm_page_bucket_t;
134
135vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
136unsigned int vm_page_bucket_count = 0; /* How big is array? */
137unsigned int vm_page_hash_mask; /* Mask for hash function */
138unsigned int vm_page_hash_shift; /* Shift for hash function */
2d21ac55 139uint32_t vm_page_bucket_hash; /* Basic bucket hash */
1c79356b
A
140decl_simple_lock_data(,vm_page_bucket_lock)
141
91447636 142
1c79356b
A
143#if MACH_PAGE_HASH_STATS
144/* This routine is only for debug. It is intended to be called by
145 * hand by a developer using a kernel debugger. This routine prints
146 * out vm_page_hash table statistics to the kernel debug console.
147 */
148void
149hash_debug(void)
150{
151 int i;
152 int numbuckets = 0;
153 int highsum = 0;
154 int maxdepth = 0;
155
156 for (i = 0; i < vm_page_bucket_count; i++) {
157 if (vm_page_buckets[i].hi_count) {
158 numbuckets++;
159 highsum += vm_page_buckets[i].hi_count;
160 if (vm_page_buckets[i].hi_count > maxdepth)
161 maxdepth = vm_page_buckets[i].hi_count;
162 }
163 }
164 printf("Total number of buckets: %d\n", vm_page_bucket_count);
165 printf("Number used buckets: %d = %d%%\n",
166 numbuckets, 100*numbuckets/vm_page_bucket_count);
167 printf("Number unused buckets: %d = %d%%\n",
168 vm_page_bucket_count - numbuckets,
169 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
170 printf("Sum of bucket max depth: %d\n", highsum);
171 printf("Average bucket depth: %d.%2d\n",
172 highsum/vm_page_bucket_count,
173 highsum%vm_page_bucket_count);
174 printf("Maximum bucket depth: %d\n", maxdepth);
175}
176#endif /* MACH_PAGE_HASH_STATS */
177
178/*
179 * The virtual page size is currently implemented as a runtime
180 * variable, but is constant once initialized using vm_set_page_size.
181 * This initialization must be done in the machine-dependent
182 * bootstrap sequence, before calling other machine-independent
183 * initializations.
184 *
185 * All references to the virtual page size outside this
186 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
187 * constants.
188 */
55e303ae
A
189vm_size_t page_size = PAGE_SIZE;
190vm_size_t page_mask = PAGE_MASK;
2d21ac55 191int page_shift = PAGE_SHIFT;
1c79356b
A
192
193/*
194 * Resident page structures are initialized from
195 * a template (see vm_page_alloc).
196 *
197 * When adding a new field to the virtual memory
198 * object structure, be sure to add initialization
199 * (see vm_page_bootstrap).
200 */
201struct vm_page vm_page_template;
202
2d21ac55
A
203vm_page_t vm_pages = VM_PAGE_NULL;
204unsigned int vm_pages_count = 0;
205
1c79356b
A
206/*
207 * Resident pages that represent real memory
2d21ac55
A
208 * are allocated from a set of free lists,
209 * one per color.
1c79356b 210 */
2d21ac55
A
211unsigned int vm_colors;
212unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
213unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
214queue_head_t vm_page_queue_free[MAX_COLORS];
1c79356b 215vm_page_t vm_page_queue_fictitious;
1c79356b 216unsigned int vm_page_free_wanted;
2d21ac55 217unsigned int vm_page_free_wanted_privileged;
91447636
A
218unsigned int vm_page_free_count;
219unsigned int vm_page_fictitious_count;
1c79356b
A
220
221unsigned int vm_page_free_count_minimum; /* debugging */
222
223/*
224 * Occasionally, the virtual memory system uses
225 * resident page structures that do not refer to
226 * real pages, for example to leave a page with
227 * important state information in the VP table.
228 *
229 * These page structures are allocated the way
230 * most other kernel structures are.
231 */
232zone_t vm_page_zone;
233decl_mutex_data(,vm_page_alloc_lock)
9bccf70c 234unsigned int io_throttle_zero_fill;
1c79356b
A
235
236/*
237 * Fictitious pages don't have a physical address,
55e303ae 238 * but we must initialize phys_page to something.
1c79356b
A
239 * For debugging, this should be a strange value
240 * that the pmap module can recognize in assertions.
241 */
242vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
243
2d21ac55
A
244/*
245 * Guard pages are not accessible so they don't
246 * need a physical address, but we need to enter
247 * one in the pmap.
248 * Let's make it recognizable and make sure that
249 * we don't use a real physical page with that
250 * physical address.
251 */
252vm_offset_t vm_page_guard_addr = (vm_offset_t) -2;
253
1c79356b
A
254/*
255 * Resident page structures are also chained on
256 * queues that are used by the page replacement
257 * system (pageout daemon). These queues are
258 * defined here, but are shared by the pageout
9bccf70c
A
259 * module. The inactive queue is broken into
260 * inactive and zf for convenience as the
261 * pageout daemon often assignes a higher
262 * affinity to zf pages
1c79356b
A
263 */
264queue_head_t vm_page_queue_active;
265queue_head_t vm_page_queue_inactive;
2d21ac55
A
266queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
267
91447636
A
268unsigned int vm_page_active_count;
269unsigned int vm_page_inactive_count;
2d21ac55
A
270unsigned int vm_page_throttled_count;
271unsigned int vm_page_speculative_count;
91447636
A
272unsigned int vm_page_wire_count;
273unsigned int vm_page_gobble_count = 0;
274unsigned int vm_page_wire_count_warning = 0;
275unsigned int vm_page_gobble_count_warning = 0;
276
277unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
278uint64_t vm_page_purged_count = 0; /* total count of purged pages */
1c79356b 279
2d21ac55
A
280unsigned int vm_page_speculative_recreated = 0;
281unsigned int vm_page_speculative_created = 0;
282unsigned int vm_page_speculative_used = 0;
283
0c530ab8
A
284ppnum_t vm_lopage_poolstart = 0;
285ppnum_t vm_lopage_poolend = 0;
286int vm_lopage_poolsize = 0;
287uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
288
289
1c79356b
A
290/*
291 * Several page replacement parameters are also
292 * shared with this module, so that page allocation
293 * (done here in vm_page_alloc) can trigger the
294 * pageout daemon.
295 */
91447636
A
296unsigned int vm_page_free_target = 0;
297unsigned int vm_page_free_min = 0;
298unsigned int vm_page_inactive_target = 0;
2d21ac55 299unsigned int vm_page_inactive_min = 0;
91447636 300unsigned int vm_page_free_reserved = 0;
2d21ac55 301unsigned int vm_page_zfill_throttle_count = 0;
1c79356b
A
302
303/*
304 * The VM system has a couple of heuristics for deciding
305 * that pages are "uninteresting" and should be placed
306 * on the inactive queue as likely candidates for replacement.
307 * These variables let the heuristics be controlled at run-time
308 * to make experimentation easier.
309 */
310
311boolean_t vm_page_deactivate_hint = TRUE;
312
313/*
314 * vm_set_page_size:
315 *
316 * Sets the page size, perhaps based upon the memory
317 * size. Must be called before any use of page-size
318 * dependent functions.
319 *
320 * Sets page_shift and page_mask from page_size.
321 */
322void
323vm_set_page_size(void)
324{
1c79356b
A
325 page_mask = page_size - 1;
326
327 if ((page_mask & page_size) != 0)
328 panic("vm_set_page_size: page size not a power of two");
329
330 for (page_shift = 0; ; page_shift++)
91447636 331 if ((1U << page_shift) == page_size)
1c79356b 332 break;
1c79356b
A
333}
334
2d21ac55
A
335
336/* Called once during statup, once the cache geometry is known.
337 */
338static void
339vm_page_set_colors( void )
340{
341 unsigned int n, override;
342
593a1d5f 343 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
2d21ac55
A
344 n = override;
345 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
346 n = vm_cache_geometry_colors;
347 else n = DEFAULT_COLORS; /* use default if all else fails */
348
349 if ( n == 0 )
350 n = 1;
351 if ( n > MAX_COLORS )
352 n = MAX_COLORS;
353
354 /* the count must be a power of 2 */
355 if ( ( n & (n - 1)) !=0 )
356 panic("vm_page_set_colors");
357
358 vm_colors = n;
359 vm_color_mask = n - 1;
360}
361
362
1c79356b
A
363/*
364 * vm_page_bootstrap:
365 *
366 * Initializes the resident memory module.
367 *
368 * Allocates memory for the page cells, and
369 * for the object/offset-to-page hash table headers.
370 * Each page cell is initialized and placed on the free list.
371 * Returns the range of available kernel virtual memory.
372 */
373
374void
375vm_page_bootstrap(
376 vm_offset_t *startp,
377 vm_offset_t *endp)
378{
379 register vm_page_t m;
91447636 380 unsigned int i;
1c79356b
A
381 unsigned int log1;
382 unsigned int log2;
383 unsigned int size;
384
385 /*
386 * Initialize the vm_page template.
387 */
388
389 m = &vm_page_template;
91447636
A
390 m->object = VM_OBJECT_NULL; /* reset later */
391 m->offset = (vm_object_offset_t) -1; /* reset later */
1c79356b
A
392 m->wire_count = 0;
393
91447636
A
394 m->pageq.next = NULL;
395 m->pageq.prev = NULL;
396 m->listq.next = NULL;
397 m->listq.prev = NULL;
398
2d21ac55
A
399 m->speculative = FALSE;
400 m->throttled = FALSE;
1c79356b
A
401 m->inactive = FALSE;
402 m->active = FALSE;
2d21ac55 403 m->no_cache = FALSE;
1c79356b
A
404 m->laundry = FALSE;
405 m->free = FALSE;
2d21ac55 406 m->pmapped = FALSE;
4a3eedf9 407 m->wpmapped = FALSE;
1c79356b
A
408 m->reference = FALSE;
409 m->pageout = FALSE;
0b4e3aa0 410 m->dump_cleaning = FALSE;
1c79356b
A
411 m->list_req_pending = FALSE;
412
413 m->busy = TRUE;
414 m->wanted = FALSE;
415 m->tabled = FALSE;
416 m->fictitious = FALSE;
417 m->private = FALSE;
418 m->absent = FALSE;
419 m->error = FALSE;
420 m->dirty = FALSE;
421 m->cleaning = FALSE;
422 m->precious = FALSE;
423 m->clustered = FALSE;
1c79356b
A
424 m->unusual = FALSE;
425 m->restart = FALSE;
9bccf70c 426 m->zero_fill = FALSE;
91447636 427 m->encrypted = FALSE;
2d21ac55
A
428 m->encrypted_cleaning = FALSE;
429 m->deactivated = FALSE;
1c79356b 430
55e303ae 431 m->phys_page = 0; /* reset later */
1c79356b 432
1c79356b
A
433 /*
434 * Initialize the page queues.
435 */
436
91447636
A
437 mutex_init(&vm_page_queue_free_lock, 0);
438 mutex_init(&vm_page_queue_lock, 0);
1c79356b 439
2d21ac55
A
440 mutex_init(&vm_purgeable_queue_lock, 0);
441
442 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
443 int group;
444
445 purgeable_queues[i].token_q_head = 0;
446 purgeable_queues[i].token_q_tail = 0;
447 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
448 queue_init(&purgeable_queues[i].objq[group]);
449
450 purgeable_queues[i].type = i;
451 purgeable_queues[i].new_pages = 0;
452#if MACH_ASSERT
453 purgeable_queues[i].debug_count_tokens = 0;
454 purgeable_queues[i].debug_count_objects = 0;
455#endif
456 };
457
458 for (i = 0; i < MAX_COLORS; i++ )
459 queue_init(&vm_page_queue_free[i]);
460 queue_init(&vm_lopage_queue_free);
1c79356b
A
461 vm_page_queue_fictitious = VM_PAGE_NULL;
462 queue_init(&vm_page_queue_active);
463 queue_init(&vm_page_queue_inactive);
2d21ac55 464 queue_init(&vm_page_queue_throttled);
9bccf70c 465 queue_init(&vm_page_queue_zf);
1c79356b 466
2d21ac55
A
467 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
468 queue_init(&vm_page_queue_speculative[i].age_q);
469
470 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
471 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
472 }
1c79356b 473 vm_page_free_wanted = 0;
2d21ac55
A
474 vm_page_free_wanted_privileged = 0;
475
476 vm_page_set_colors();
477
1c79356b
A
478
479 /*
480 * Steal memory for the map and zone subsystems.
481 */
482
483 vm_map_steal_memory();
484 zone_steal_memory();
485
486 /*
487 * Allocate (and initialize) the virtual-to-physical
488 * table hash buckets.
489 *
490 * The number of buckets should be a power of two to
491 * get a good hash function. The following computation
492 * chooses the first power of two that is greater
493 * than the number of physical pages in the system.
494 */
495
91447636 496 simple_lock_init(&vm_page_bucket_lock, 0);
1c79356b
A
497
498 if (vm_page_bucket_count == 0) {
499 unsigned int npages = pmap_free_pages();
500
501 vm_page_bucket_count = 1;
502 while (vm_page_bucket_count < npages)
503 vm_page_bucket_count <<= 1;
504 }
505
506 vm_page_hash_mask = vm_page_bucket_count - 1;
507
508 /*
509 * Calculate object shift value for hashing algorithm:
510 * O = log2(sizeof(struct vm_object))
511 * B = log2(vm_page_bucket_count)
512 * hash shifts the object left by
513 * B/2 - O
514 */
515 size = vm_page_bucket_count;
516 for (log1 = 0; size > 1; log1++)
517 size /= 2;
518 size = sizeof(struct vm_object);
519 for (log2 = 0; size > 1; log2++)
520 size /= 2;
521 vm_page_hash_shift = log1/2 - log2 + 1;
55e303ae
A
522
523 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
524 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
525 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
1c79356b
A
526
527 if (vm_page_hash_mask & vm_page_bucket_count)
528 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
529
530 vm_page_buckets = (vm_page_bucket_t *)
531 pmap_steal_memory(vm_page_bucket_count *
532 sizeof(vm_page_bucket_t));
533
534 for (i = 0; i < vm_page_bucket_count; i++) {
535 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
536
537 bucket->pages = VM_PAGE_NULL;
538#if MACH_PAGE_HASH_STATS
539 bucket->cur_count = 0;
540 bucket->hi_count = 0;
541#endif /* MACH_PAGE_HASH_STATS */
542 }
543
544 /*
545 * Machine-dependent code allocates the resident page table.
546 * It uses vm_page_init to initialize the page frames.
547 * The code also returns to us the virtual space available
548 * to the kernel. We don't trust the pmap module
549 * to get the alignment right.
550 */
551
552 pmap_startup(&virtual_space_start, &virtual_space_end);
91447636
A
553 virtual_space_start = round_page(virtual_space_start);
554 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
555
556 *startp = virtual_space_start;
557 *endp = virtual_space_end;
558
559 /*
560 * Compute the initial "wire" count.
561 * Up until now, the pages which have been set aside are not under
562 * the VM system's control, so although they aren't explicitly
563 * wired, they nonetheless can't be moved. At this moment,
564 * all VM managed pages are "free", courtesy of pmap_startup.
565 */
55e303ae 566 vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */
1c79356b 567 vm_page_free_count_minimum = vm_page_free_count;
91447636 568
2d21ac55
A
569 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
570 vm_page_free_count, vm_page_wire_count);
571
91447636 572 simple_lock_init(&vm_paging_lock, 0);
1c79356b
A
573}
574
575#ifndef MACHINE_PAGES
576/*
577 * We implement pmap_steal_memory and pmap_startup with the help
578 * of two simpler functions, pmap_virtual_space and pmap_next_page.
579 */
580
91447636 581void *
1c79356b
A
582pmap_steal_memory(
583 vm_size_t size)
584{
55e303ae
A
585 vm_offset_t addr, vaddr;
586 ppnum_t phys_page;
1c79356b
A
587
588 /*
589 * We round the size to a round multiple.
590 */
591
592 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
593
594 /*
595 * If this is the first call to pmap_steal_memory,
596 * we have to initialize ourself.
597 */
598
599 if (virtual_space_start == virtual_space_end) {
600 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
601
602 /*
603 * The initial values must be aligned properly, and
604 * we don't trust the pmap module to do it right.
605 */
606
91447636
A
607 virtual_space_start = round_page(virtual_space_start);
608 virtual_space_end = trunc_page(virtual_space_end);
1c79356b
A
609 }
610
611 /*
612 * Allocate virtual memory for this request.
613 */
614
615 addr = virtual_space_start;
616 virtual_space_start += size;
617
618 kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size); /* (TEST/DEBUG) */
619
620 /*
621 * Allocate and map physical pages to back new virtual pages.
622 */
623
91447636 624 for (vaddr = round_page(addr);
1c79356b
A
625 vaddr < addr + size;
626 vaddr += PAGE_SIZE) {
55e303ae 627 if (!pmap_next_page(&phys_page))
1c79356b
A
628 panic("pmap_steal_memory");
629
630 /*
631 * XXX Logically, these mappings should be wired,
632 * but some pmap modules barf if they are.
633 */
634
55e303ae 635 pmap_enter(kernel_pmap, vaddr, phys_page,
9bccf70c
A
636 VM_PROT_READ|VM_PROT_WRITE,
637 VM_WIMG_USE_DEFAULT, FALSE);
1c79356b
A
638 /*
639 * Account for newly stolen memory
640 */
641 vm_page_wire_count++;
642
643 }
644
91447636 645 return (void *) addr;
1c79356b
A
646}
647
648void
649pmap_startup(
650 vm_offset_t *startp,
651 vm_offset_t *endp)
652{
55e303ae 653 unsigned int i, npages, pages_initialized, fill, fillval;
55e303ae
A
654 ppnum_t phys_page;
655 addr64_t tmpaddr;
0c530ab8
A
656 unsigned int num_of_lopages = 0;
657 unsigned int last_index;
1c79356b
A
658
659 /*
660 * We calculate how many page frames we will have
661 * and then allocate the page structures in one chunk.
662 */
663
55e303ae
A
664 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
665 tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */
2d21ac55 666 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
1c79356b 667
2d21ac55 668 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
1c79356b
A
669
670 /*
671 * Initialize the page frames.
672 */
1c79356b 673 for (i = 0, pages_initialized = 0; i < npages; i++) {
55e303ae 674 if (!pmap_next_page(&phys_page))
1c79356b
A
675 break;
676
2d21ac55 677 vm_page_init(&vm_pages[i], phys_page);
1c79356b
A
678 vm_page_pages++;
679 pages_initialized++;
680 }
2d21ac55 681 vm_pages_count = pages_initialized;
1c79356b 682
0c530ab8
A
683 /*
684 * Check if we want to initialize pages to a known value
685 */
686 fill = 0; /* Assume no fill */
593a1d5f 687 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
2d21ac55 688
0c530ab8
A
689
690 /*
691 * if vm_lopage_poolsize is non-zero, than we need to reserve
692 * a pool of pages whose addresess are less than 4G... this pool
693 * is used by drivers whose hardware can't DMA beyond 32 bits...
694 *
695 * note that I'm assuming that the page list is ascending and
696 * ordered w/r to the physical address
697 */
698 for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
699 vm_page_t m;
700
2d21ac55 701 m = &vm_pages[i];
0c530ab8
A
702
703 if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
704 panic("couldn't reserve the lopage pool: not enough lo pages\n");
705
706 if (m->phys_page < vm_lopage_poolend)
707 panic("couldn't reserve the lopage pool: page list out of order\n");
708
709 vm_lopage_poolend = m->phys_page;
710
711 if (vm_lopage_poolstart == 0)
712 vm_lopage_poolstart = m->phys_page;
713 else {
714 if (m->phys_page < vm_lopage_poolstart)
715 panic("couldn't reserve the lopage pool: page list out of order\n");
716 }
717
718 if (fill)
719 fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
720
721 vm_page_release(m);
722 }
723 last_index = i;
724
725 // -debug code remove
726 if (2 == vm_himemory_mode) {
727 // free low -> high so high is preferred
728 for (i = last_index + 1; i <= pages_initialized; i++) {
2d21ac55
A
729 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
730 vm_page_release(&vm_pages[i - 1]);
0c530ab8
A
731 }
732 }
733 else
734 // debug code remove-
735
1c79356b
A
736 /*
737 * Release pages in reverse order so that physical pages
738 * initially get allocated in ascending addresses. This keeps
739 * the devices (which must address physical memory) happy if
740 * they require several consecutive pages.
741 */
0c530ab8 742 for (i = pages_initialized; i > last_index; i--) {
2d21ac55
A
743 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
744 vm_page_release(&vm_pages[i - 1]);
1c79356b
A
745 }
746
55e303ae
A
747#if 0
748 {
749 vm_page_t xx, xxo, xxl;
2d21ac55 750 int i, j, k, l;
55e303ae
A
751
752 j = 0; /* (BRINGUP) */
753 xxl = 0;
754
2d21ac55
A
755 for( i = 0; i < vm_colors; i++ ) {
756 queue_iterate(&vm_page_queue_free[i],
757 xx,
758 vm_page_t,
759 pageq) { /* BRINGUP */
760 j++; /* (BRINGUP) */
761 if(j > vm_page_free_count) { /* (BRINGUP) */
762 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
55e303ae 763 }
2d21ac55
A
764
765 l = vm_page_free_count - j; /* (BRINGUP) */
766 k = 0; /* (BRINGUP) */
767
768 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
769
770 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
771 k++;
772 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
773 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
774 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
775 }
776 }
777
778 xxl = xx;
55e303ae
A
779 }
780 }
781
782 if(j != vm_page_free_count) { /* (BRINGUP) */
783 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
784 }
785 }
786#endif
787
788
1c79356b
A
789 /*
790 * We have to re-align virtual_space_start,
791 * because pmap_steal_memory has been using it.
792 */
793
55e303ae 794 virtual_space_start = round_page_32(virtual_space_start);
1c79356b
A
795
796 *startp = virtual_space_start;
797 *endp = virtual_space_end;
798}
799#endif /* MACHINE_PAGES */
800
801/*
802 * Routine: vm_page_module_init
803 * Purpose:
804 * Second initialization pass, to be done after
805 * the basic VM system is ready.
806 */
807void
808vm_page_module_init(void)
809{
810 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
811 0, PAGE_SIZE, "vm pages");
812
813#if ZONE_DEBUG
814 zone_debug_disable(vm_page_zone);
815#endif /* ZONE_DEBUG */
816
817 zone_change(vm_page_zone, Z_EXPAND, FALSE);
818 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
819 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
820
821 /*
822 * Adjust zone statistics to account for the real pages allocated
823 * in vm_page_create(). [Q: is this really what we want?]
824 */
825 vm_page_zone->count += vm_page_pages;
826 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
827
91447636 828 mutex_init(&vm_page_alloc_lock, 0);
1c79356b
A
829}
830
831/*
832 * Routine: vm_page_create
833 * Purpose:
834 * After the VM system is up, machine-dependent code
835 * may stumble across more physical memory. For example,
836 * memory that it was reserving for a frame buffer.
837 * vm_page_create turns this memory into available pages.
838 */
839
840void
841vm_page_create(
55e303ae
A
842 ppnum_t start,
843 ppnum_t end)
1c79356b 844{
55e303ae
A
845 ppnum_t phys_page;
846 vm_page_t m;
1c79356b 847
55e303ae
A
848 for (phys_page = start;
849 phys_page < end;
850 phys_page++) {
1c79356b
A
851 while ((m = (vm_page_t) vm_page_grab_fictitious())
852 == VM_PAGE_NULL)
853 vm_page_more_fictitious();
854
55e303ae 855 vm_page_init(m, phys_page);
1c79356b
A
856 vm_page_pages++;
857 vm_page_release(m);
858 }
859}
860
861/*
862 * vm_page_hash:
863 *
864 * Distributes the object/offset key pair among hash buckets.
865 *
55e303ae 866 * NOTE: The bucket count must be a power of 2
1c79356b
A
867 */
868#define vm_page_hash(object, offset) (\
55e303ae 869 ( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1c79356b
A
870 & vm_page_hash_mask)
871
2d21ac55 872
1c79356b
A
873/*
874 * vm_page_insert: [ internal use only ]
875 *
876 * Inserts the given mem entry into the object/object-page
877 * table and object list.
878 *
879 * The object must be locked.
880 */
1c79356b
A
881void
882vm_page_insert(
2d21ac55
A
883 vm_page_t mem,
884 vm_object_t object,
885 vm_object_offset_t offset)
886{
887 vm_page_insert_internal(mem, object, offset, FALSE);
888}
889
890
4a3eedf9 891void
2d21ac55
A
892vm_page_insert_internal(
893 vm_page_t mem,
894 vm_object_t object,
895 vm_object_offset_t offset,
896 boolean_t queues_lock_held)
1c79356b
A
897{
898 register vm_page_bucket_t *bucket;
899
900 XPR(XPR_VM_PAGE,
901 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
902 (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
903
904 VM_PAGE_CHECK(mem);
905
2d21ac55
A
906 if (object == vm_submap_object) {
907 /* the vm_submap_object is only a placeholder for submaps */
908 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
909 }
910
911 vm_object_lock_assert_exclusive(object);
912#if DEBUG
91447636
A
913 if (mem->tabled || mem->object != VM_OBJECT_NULL)
914 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
915 "already in (obj=%p,off=0x%llx)",
916 mem, object, offset, mem->object, mem->offset);
917#endif
1c79356b
A
918 assert(!object->internal || offset < object->size);
919
920 /* only insert "pageout" pages into "pageout" objects,
921 * and normal pages into normal objects */
922 assert(object->pageout == mem->pageout);
923
91447636
A
924 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
925
1c79356b
A
926 /*
927 * Record the object/offset pair in this page
928 */
929
930 mem->object = object;
931 mem->offset = offset;
932
933 /*
934 * Insert it into the object_object/offset hash table
935 */
936
937 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
938 simple_lock(&vm_page_bucket_lock);
939 mem->next = bucket->pages;
940 bucket->pages = mem;
941#if MACH_PAGE_HASH_STATS
942 if (++bucket->cur_count > bucket->hi_count)
943 bucket->hi_count = bucket->cur_count;
944#endif /* MACH_PAGE_HASH_STATS */
945 simple_unlock(&vm_page_bucket_lock);
946
947 /*
948 * Now link into the object's list of backed pages.
949 */
950
91447636 951 VM_PAGE_INSERT(mem, object);
1c79356b
A
952 mem->tabled = TRUE;
953
954 /*
955 * Show that the object has one more resident page.
956 */
957
958 object->resident_page_count++;
91447636 959
593a1d5f 960 if (object->purgable == VM_PURGABLE_VOLATILE) {
2d21ac55
A
961 if (queues_lock_held == FALSE)
962 vm_page_lockspin_queues();
963
91447636 964 vm_page_purgeable_count++;
2d21ac55 965
593a1d5f
A
966 if (queues_lock_held == FALSE)
967 vm_page_unlock_queues();
968 } else if (object->purgable == VM_PURGABLE_EMPTY &&
969 mem->throttled) {
970 if (queues_lock_held == FALSE)
971 vm_page_lock_queues();
972 vm_page_deactivate(mem);
2d21ac55
A
973 if (queues_lock_held == FALSE)
974 vm_page_unlock_queues();
91447636 975 }
1c79356b
A
976}
977
978/*
979 * vm_page_replace:
980 *
981 * Exactly like vm_page_insert, except that we first
982 * remove any existing page at the given offset in object.
983 *
984 * The object and page queues must be locked.
985 */
986
987void
988vm_page_replace(
989 register vm_page_t mem,
990 register vm_object_t object,
991 register vm_object_offset_t offset)
992{
0c530ab8
A
993 vm_page_bucket_t *bucket;
994 vm_page_t found_m = VM_PAGE_NULL;
1c79356b
A
995
996 VM_PAGE_CHECK(mem);
2d21ac55 997 vm_object_lock_assert_exclusive(object);
91447636 998#if DEBUG
91447636
A
999 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1000
1001 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1002 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1003 "already in (obj=%p,off=0x%llx)",
1004 mem, object, offset, mem->object, mem->offset);
1005#endif
1c79356b
A
1006 /*
1007 * Record the object/offset pair in this page
1008 */
1009
1010 mem->object = object;
1011 mem->offset = offset;
1012
1013 /*
1014 * Insert it into the object_object/offset hash table,
1015 * replacing any page that might have been there.
1016 */
1017
1018 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1019 simple_lock(&vm_page_bucket_lock);
0c530ab8 1020
1c79356b
A
1021 if (bucket->pages) {
1022 vm_page_t *mp = &bucket->pages;
1023 register vm_page_t m = *mp;
0c530ab8 1024
1c79356b
A
1025 do {
1026 if (m->object == object && m->offset == offset) {
1027 /*
0c530ab8 1028 * Remove old page from hash list
1c79356b
A
1029 */
1030 *mp = m->next;
1c79356b 1031
0c530ab8 1032 found_m = m;
1c79356b
A
1033 break;
1034 }
1035 mp = &m->next;
91447636 1036 } while ((m = *mp));
0c530ab8 1037
1c79356b
A
1038 mem->next = bucket->pages;
1039 } else {
1040 mem->next = VM_PAGE_NULL;
1041 }
0c530ab8
A
1042 /*
1043 * insert new page at head of hash list
1044 */
1c79356b 1045 bucket->pages = mem;
0c530ab8 1046
1c79356b
A
1047 simple_unlock(&vm_page_bucket_lock);
1048
0c530ab8
A
1049 if (found_m) {
1050 /*
1051 * there was already a page at the specified
1052 * offset for this object... remove it from
1053 * the object and free it back to the free list
1054 */
1055 VM_PAGE_REMOVE(found_m);
1056 found_m->tabled = FALSE;
1057
1058 found_m->object = VM_OBJECT_NULL;
1059 found_m->offset = (vm_object_offset_t) -1;
1060 object->resident_page_count--;
1061
593a1d5f 1062 if (object->purgable == VM_PURGABLE_VOLATILE) {
0c530ab8
A
1063 assert(vm_page_purgeable_count > 0);
1064 vm_page_purgeable_count--;
1065 }
1066
1067 /*
1068 * Return page to the free list.
1069 * Note the page is not tabled now
1070 */
1071 vm_page_free(found_m);
1072 }
1c79356b
A
1073 /*
1074 * Now link into the object's list of backed pages.
1075 */
1076
91447636 1077 VM_PAGE_INSERT(mem, object);
1c79356b
A
1078 mem->tabled = TRUE;
1079
1080 /*
1081 * And show that the object has one more resident
1082 * page.
1083 */
1084
1085 object->resident_page_count++;
91447636 1086
593a1d5f 1087 if (object->purgable == VM_PURGABLE_VOLATILE) {
91447636 1088 vm_page_purgeable_count++;
593a1d5f
A
1089 } else if (object->purgable == VM_PURGABLE_EMPTY) {
1090 if (mem->throttled) {
1091 vm_page_deactivate(mem);
1092 }
91447636 1093 }
1c79356b
A
1094}
1095
1096/*
1097 * vm_page_remove: [ internal use only ]
1098 *
1099 * Removes the given mem entry from the object/offset-page
1100 * table and the object page list.
1101 *
91447636 1102 * The object and page queues must be locked.
1c79356b
A
1103 */
1104
1105void
1106vm_page_remove(
1107 register vm_page_t mem)
1108{
1109 register vm_page_bucket_t *bucket;
1110 register vm_page_t this;
1111
1112 XPR(XPR_VM_PAGE,
1113 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1114 (integer_t)mem->object, (integer_t)mem->offset,
1115 (integer_t)mem, 0,0);
91447636
A
1116#if DEBUG
1117 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
91447636 1118#endif
2d21ac55 1119 vm_object_lock_assert_exclusive(mem->object);
1c79356b
A
1120 assert(mem->tabled);
1121 assert(!mem->cleaning);
1122 VM_PAGE_CHECK(mem);
1123
91447636 1124
1c79356b
A
1125 /*
1126 * Remove from the object_object/offset hash table
1127 */
1128
1129 bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
1130 simple_lock(&vm_page_bucket_lock);
1131 if ((this = bucket->pages) == mem) {
1132 /* optimize for common case */
1133
1134 bucket->pages = mem->next;
1135 } else {
1136 register vm_page_t *prev;
1137
1138 for (prev = &this->next;
1139 (this = *prev) != mem;
1140 prev = &this->next)
1141 continue;
1142 *prev = this->next;
1143 }
1144#if MACH_PAGE_HASH_STATS
1145 bucket->cur_count--;
1146#endif /* MACH_PAGE_HASH_STATS */
1147 simple_unlock(&vm_page_bucket_lock);
1148
1149 /*
1150 * Now remove from the object's list of backed pages.
1151 */
1152
91447636 1153 VM_PAGE_REMOVE(mem);
1c79356b
A
1154
1155 /*
1156 * And show that the object has one fewer resident
1157 * page.
1158 */
1159
1160 mem->object->resident_page_count--;
1161
593a1d5f 1162 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
91447636
A
1163 assert(vm_page_purgeable_count > 0);
1164 vm_page_purgeable_count--;
1165 }
1c79356b
A
1166 mem->tabled = FALSE;
1167 mem->object = VM_OBJECT_NULL;
91447636 1168 mem->offset = (vm_object_offset_t) -1;
1c79356b
A
1169}
1170
1171/*
1172 * vm_page_lookup:
1173 *
1174 * Returns the page associated with the object/offset
1175 * pair specified; if none is found, VM_PAGE_NULL is returned.
1176 *
1177 * The object must be locked. No side effects.
1178 */
1179
91447636
A
1180unsigned long vm_page_lookup_hint = 0;
1181unsigned long vm_page_lookup_hint_next = 0;
1182unsigned long vm_page_lookup_hint_prev = 0;
1183unsigned long vm_page_lookup_hint_miss = 0;
2d21ac55
A
1184unsigned long vm_page_lookup_bucket_NULL = 0;
1185unsigned long vm_page_lookup_miss = 0;
1186
91447636 1187
1c79356b
A
1188vm_page_t
1189vm_page_lookup(
1190 register vm_object_t object,
1191 register vm_object_offset_t offset)
1192{
1193 register vm_page_t mem;
1194 register vm_page_bucket_t *bucket;
91447636 1195 queue_entry_t qe;
91447636 1196
2d21ac55 1197 vm_object_lock_assert_held(object);
91447636 1198 mem = object->memq_hint;
2d21ac55 1199
91447636
A
1200 if (mem != VM_PAGE_NULL) {
1201 assert(mem->object == object);
2d21ac55 1202
91447636
A
1203 if (mem->offset == offset) {
1204 vm_page_lookup_hint++;
1205 return mem;
1206 }
1207 qe = queue_next(&mem->listq);
2d21ac55 1208
91447636
A
1209 if (! queue_end(&object->memq, qe)) {
1210 vm_page_t next_page;
1211
1212 next_page = (vm_page_t) qe;
1213 assert(next_page->object == object);
2d21ac55 1214
91447636
A
1215 if (next_page->offset == offset) {
1216 vm_page_lookup_hint_next++;
1217 object->memq_hint = next_page; /* new hint */
1218 return next_page;
1219 }
1220 }
1221 qe = queue_prev(&mem->listq);
2d21ac55 1222
91447636
A
1223 if (! queue_end(&object->memq, qe)) {
1224 vm_page_t prev_page;
1225
1226 prev_page = (vm_page_t) qe;
1227 assert(prev_page->object == object);
2d21ac55 1228
91447636
A
1229 if (prev_page->offset == offset) {
1230 vm_page_lookup_hint_prev++;
1231 object->memq_hint = prev_page; /* new hint */
1232 return prev_page;
1233 }
1234 }
1235 }
1c79356b 1236 /*
2d21ac55 1237 * Search the hash table for this object/offset pair
1c79356b 1238 */
1c79356b
A
1239 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1240
2d21ac55
A
1241 /*
1242 * since we hold the object lock, we are guaranteed that no
1243 * new pages can be inserted into this object... this in turn
1244 * guarantess that the page we're looking for can't exist
1245 * if the bucket it hashes to is currently NULL even when looked
1246 * at outside the scope of the hash bucket lock... this is a
1247 * really cheap optimiztion to avoid taking the lock
1248 */
1249 if (bucket->pages == VM_PAGE_NULL) {
1250 vm_page_lookup_bucket_NULL++;
1251
1252 return (VM_PAGE_NULL);
1253 }
1c79356b 1254 simple_lock(&vm_page_bucket_lock);
0c530ab8 1255
1c79356b
A
1256 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1257 VM_PAGE_CHECK(mem);
1258 if ((mem->object == object) && (mem->offset == offset))
1259 break;
1260 }
1261 simple_unlock(&vm_page_bucket_lock);
55e303ae 1262
91447636
A
1263 if (mem != VM_PAGE_NULL) {
1264 if (object->memq_hint != VM_PAGE_NULL) {
1265 vm_page_lookup_hint_miss++;
1266 }
1267 assert(mem->object == object);
1268 object->memq_hint = mem;
2d21ac55
A
1269 } else
1270 vm_page_lookup_miss++;
91447636
A
1271
1272 return(mem);
1273}
1274
1275
1c79356b
A
1276/*
1277 * vm_page_rename:
1278 *
1279 * Move the given memory entry from its
1280 * current object to the specified target object/offset.
1281 *
1282 * The object must be locked.
1283 */
1284void
1285vm_page_rename(
1286 register vm_page_t mem,
1287 register vm_object_t new_object,
2d21ac55
A
1288 vm_object_offset_t new_offset,
1289 boolean_t encrypted_ok)
1c79356b
A
1290{
1291 assert(mem->object != new_object);
2d21ac55 1292
91447636
A
1293 /*
1294 * ENCRYPTED SWAP:
1295 * The encryption key is based on the page's memory object
1296 * (aka "pager") and paging offset. Moving the page to
1297 * another VM object changes its "pager" and "paging_offset"
2d21ac55
A
1298 * so it has to be decrypted first, or we would lose the key.
1299 *
1300 * One exception is VM object collapsing, where we transfer pages
1301 * from one backing object to its parent object. This operation also
1302 * transfers the paging information, so the <pager,paging_offset> info
1303 * should remain consistent. The caller (vm_object_do_collapse())
1304 * sets "encrypted_ok" in this case.
91447636 1305 */
2d21ac55 1306 if (!encrypted_ok && mem->encrypted) {
91447636
A
1307 panic("vm_page_rename: page %p is encrypted\n", mem);
1308 }
2d21ac55 1309
1c79356b
A
1310 /*
1311 * Changes to mem->object require the page lock because
1312 * the pageout daemon uses that lock to get the object.
1313 */
1314
1315 XPR(XPR_VM_PAGE,
1316 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1317 (integer_t)new_object, (integer_t)new_offset,
1318 (integer_t)mem, 0,0);
1319
2d21ac55 1320 vm_page_lockspin_queues();
1c79356b
A
1321 vm_page_remove(mem);
1322 vm_page_insert(mem, new_object, new_offset);
1323 vm_page_unlock_queues();
1324}
1325
1326/*
1327 * vm_page_init:
1328 *
1329 * Initialize the fields in a new page.
1330 * This takes a structure with random values and initializes it
1331 * so that it can be given to vm_page_release or vm_page_insert.
1332 */
1333void
1334vm_page_init(
1335 vm_page_t mem,
55e303ae 1336 ppnum_t phys_page)
1c79356b 1337{
91447636 1338 assert(phys_page);
1c79356b 1339 *mem = vm_page_template;
55e303ae 1340 mem->phys_page = phys_page;
1c79356b
A
1341}
1342
1343/*
1344 * vm_page_grab_fictitious:
1345 *
1346 * Remove a fictitious page from the free list.
1347 * Returns VM_PAGE_NULL if there are no free pages.
1348 */
1349int c_vm_page_grab_fictitious = 0;
1350int c_vm_page_release_fictitious = 0;
1351int c_vm_page_more_fictitious = 0;
1352
2d21ac55
A
1353extern vm_page_t vm_page_grab_fictitious_common(vm_offset_t phys_addr);
1354
1c79356b 1355vm_page_t
2d21ac55
A
1356vm_page_grab_fictitious_common(
1357 vm_offset_t phys_addr)
1c79356b
A
1358{
1359 register vm_page_t m;
1360
1361 m = (vm_page_t)zget(vm_page_zone);
1362 if (m) {
2d21ac55 1363 vm_page_init(m, phys_addr);
1c79356b 1364 m->fictitious = TRUE;
1c79356b
A
1365 }
1366
1367 c_vm_page_grab_fictitious++;
1368 return m;
1369}
1370
2d21ac55
A
1371vm_page_t
1372vm_page_grab_fictitious(void)
1373{
1374 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1375}
1376
1377vm_page_t
1378vm_page_grab_guard(void)
1379{
1380 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1381}
1382
1c79356b
A
1383/*
1384 * vm_page_release_fictitious:
1385 *
1386 * Release a fictitious page to the free list.
1387 */
1388
1389void
1390vm_page_release_fictitious(
1391 register vm_page_t m)
1392{
1393 assert(!m->free);
1394 assert(m->busy);
1395 assert(m->fictitious);
2d21ac55
A
1396 assert(m->phys_page == vm_page_fictitious_addr ||
1397 m->phys_page == vm_page_guard_addr);
1c79356b
A
1398
1399 c_vm_page_release_fictitious++;
91447636 1400#if DEBUG
1c79356b
A
1401 if (m->free)
1402 panic("vm_page_release_fictitious");
91447636 1403#endif
1c79356b 1404 m->free = TRUE;
91447636 1405 zfree(vm_page_zone, m);
1c79356b
A
1406}
1407
1408/*
1409 * vm_page_more_fictitious:
1410 *
1411 * Add more fictitious pages to the free list.
1412 * Allowed to block. This routine is way intimate
1413 * with the zones code, for several reasons:
1414 * 1. we need to carve some page structures out of physical
1415 * memory before zones work, so they _cannot_ come from
1416 * the zone_map.
1417 * 2. the zone needs to be collectable in order to prevent
1418 * growth without bound. These structures are used by
1419 * the device pager (by the hundreds and thousands), as
1420 * private pages for pageout, and as blocking pages for
1421 * pagein. Temporary bursts in demand should not result in
1422 * permanent allocation of a resource.
1423 * 3. To smooth allocation humps, we allocate single pages
1424 * with kernel_memory_allocate(), and cram them into the
1425 * zone. This also allows us to initialize the vm_page_t's
1426 * on the way into the zone, so that zget() always returns
1427 * an initialized structure. The zone free element pointer
1428 * and the free page pointer are both the first item in the
1429 * vm_page_t.
1430 * 4. By having the pages in the zone pre-initialized, we need
1431 * not keep 2 levels of lists. The garbage collector simply
1432 * scans our list, and reduces physical memory usage as it
1433 * sees fit.
1434 */
1435
1436void vm_page_more_fictitious(void)
1437{
1c79356b
A
1438 register vm_page_t m;
1439 vm_offset_t addr;
1440 kern_return_t retval;
1441 int i;
1442
1443 c_vm_page_more_fictitious++;
1444
1c79356b
A
1445 /*
1446 * Allocate a single page from the zone_map. Do not wait if no physical
1447 * pages are immediately available, and do not zero the space. We need
1448 * our own blocking lock here to prevent having multiple,
1449 * simultaneous requests from piling up on the zone_map lock. Exactly
1450 * one (of our) threads should be potentially waiting on the map lock.
1451 * If winner is not vm-privileged, then the page allocation will fail,
1452 * and it will temporarily block here in the vm_page_wait().
1453 */
1454 mutex_lock(&vm_page_alloc_lock);
1455 /*
1456 * If another thread allocated space, just bail out now.
1457 */
1458 if (zone_free_count(vm_page_zone) > 5) {
1459 /*
1460 * The number "5" is a small number that is larger than the
1461 * number of fictitious pages that any single caller will
1462 * attempt to allocate. Otherwise, a thread will attempt to
1463 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1464 * release all of the resources and locks already acquired,
1465 * and then call this routine. This routine finds the pages
1466 * that the caller released, so fails to allocate new space.
1467 * The process repeats infinitely. The largest known number
1468 * of fictitious pages required in this manner is 2. 5 is
1469 * simply a somewhat larger number.
1470 */
1471 mutex_unlock(&vm_page_alloc_lock);
1472 return;
1473 }
1474
91447636
A
1475 retval = kernel_memory_allocate(zone_map,
1476 &addr, PAGE_SIZE, VM_PROT_ALL,
1477 KMA_KOBJECT|KMA_NOPAGEWAIT);
1478 if (retval != KERN_SUCCESS) {
1c79356b
A
1479 /*
1480 * No page was available. Tell the pageout daemon, drop the
1481 * lock to give another thread a chance at it, and
1482 * wait for the pageout daemon to make progress.
1483 */
1484 mutex_unlock(&vm_page_alloc_lock);
1485 vm_page_wait(THREAD_UNINT);
1486 return;
1487 }
1488 /*
1489 * Initialize as many vm_page_t's as will fit on this page. This
1490 * depends on the zone code disturbing ONLY the first item of
1491 * each zone element.
1492 */
1493 m = (vm_page_t)addr;
1494 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1495 vm_page_init(m, vm_page_fictitious_addr);
1496 m->fictitious = TRUE;
1497 m++;
1498 }
91447636 1499 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1c79356b
A
1500 mutex_unlock(&vm_page_alloc_lock);
1501}
1502
1c79356b
A
1503
1504/*
1505 * vm_pool_low():
1506 *
1507 * Return true if it is not likely that a non-vm_privileged thread
1508 * can get memory without blocking. Advisory only, since the
1509 * situation may change under us.
1510 */
1511int
1512vm_pool_low(void)
1513{
1514 /* No locking, at worst we will fib. */
1515 return( vm_page_free_count < vm_page_free_reserved );
1516}
1517
0c530ab8
A
1518
1519
1520/*
1521 * this is an interface to support bring-up of drivers
1522 * on platforms with physical memory > 4G...
1523 */
1524int vm_himemory_mode = 0;
1525
1526
1527/*
1528 * this interface exists to support hardware controllers
1529 * incapable of generating DMAs with more than 32 bits
1530 * of address on platforms with physical memory > 4G...
1531 */
1532unsigned int vm_lopage_free_count = 0;
1533unsigned int vm_lopage_max_count = 0;
2d21ac55 1534queue_head_t vm_lopage_queue_free;
0c530ab8
A
1535
1536vm_page_t
1537vm_page_grablo(void)
1538{
1539 register vm_page_t mem;
1540 unsigned int vm_lopage_alloc_count;
1541
1542 if (vm_lopage_poolsize == 0)
1543 return (vm_page_grab());
1544
1545 mutex_lock(&vm_page_queue_free_lock);
1546
2d21ac55
A
1547 if (! queue_empty(&vm_lopage_queue_free)) {
1548 queue_remove_first(&vm_lopage_queue_free,
1549 mem,
1550 vm_page_t,
1551 pageq);
1552 assert(mem->free);
1553 assert(mem->busy);
1554 assert(!mem->pmapped);
4a3eedf9 1555 assert(!mem->wpmapped);
0c530ab8 1556
0c530ab8
A
1557 mem->pageq.next = NULL;
1558 mem->pageq.prev = NULL;
1559 mem->free = FALSE;
0c530ab8
A
1560
1561 vm_lopage_free_count--;
1562 vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1563 if (vm_lopage_alloc_count > vm_lopage_max_count)
1564 vm_lopage_max_count = vm_lopage_alloc_count;
2d21ac55
A
1565 } else {
1566 mem = VM_PAGE_NULL;
0c530ab8
A
1567 }
1568 mutex_unlock(&vm_page_queue_free_lock);
1569
1570 return (mem);
1571}
1572
1573
1c79356b
A
1574/*
1575 * vm_page_grab:
1576 *
2d21ac55
A
1577 * first try to grab a page from the per-cpu free list...
1578 * this must be done while pre-emption is disabled... if
1579 * a page is available, we're done...
1580 * if no page is available, grab the vm_page_queue_free_lock
1581 * and see if current number of free pages would allow us
1582 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1583 * if there are pages available, disable preemption and
1584 * recheck the state of the per-cpu free list... we could
1585 * have been preempted and moved to a different cpu, or
1586 * some other thread could have re-filled it... if still
1587 * empty, figure out how many pages we can steal from the
1588 * global free queue and move to the per-cpu queue...
1589 * return 1 of these pages when done... only wakeup the
1590 * pageout_scan thread if we moved pages from the global
1591 * list... no need for the wakeup if we've satisfied the
1592 * request from the per-cpu queue.
1c79356b
A
1593 */
1594
2d21ac55
A
1595#define COLOR_GROUPS_TO_STEAL 4
1596
1c79356b
A
1597
1598vm_page_t
2d21ac55 1599vm_page_grab( void )
1c79356b 1600{
2d21ac55
A
1601 vm_page_t mem;
1602
1603
1604 disable_preemption();
1605
1606 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1607return_page_from_cpu_list:
1608 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1609 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1610 mem->pageq.next = NULL;
1611
1612 enable_preemption();
1613
1614 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1615 assert(mem->tabled == FALSE);
1616 assert(mem->object == VM_OBJECT_NULL);
1617 assert(!mem->laundry);
1618 assert(!mem->free);
1619 assert(pmap_verify_free(mem->phys_page));
1620 assert(mem->busy);
1621 assert(!mem->encrypted);
1622 assert(!mem->pmapped);
4a3eedf9 1623 assert(!mem->wpmapped);
2d21ac55
A
1624
1625 return mem;
1626 }
1627 enable_preemption();
1628
1c79356b
A
1629
1630 mutex_lock(&vm_page_queue_free_lock);
1c79356b
A
1631
1632 /*
1633 * Optionally produce warnings if the wire or gobble
1634 * counts exceed some threshold.
1635 */
1636 if (vm_page_wire_count_warning > 0
1637 && vm_page_wire_count >= vm_page_wire_count_warning) {
1638 printf("mk: vm_page_grab(): high wired page count of %d\n",
1639 vm_page_wire_count);
1640 assert(vm_page_wire_count < vm_page_wire_count_warning);
1641 }
1642 if (vm_page_gobble_count_warning > 0
1643 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1644 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1645 vm_page_gobble_count);
1646 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1647 }
1648
1649 /*
1650 * Only let privileged threads (involved in pageout)
1651 * dip into the reserved pool.
1652 */
1c79356b 1653 if ((vm_page_free_count < vm_page_free_reserved) &&
91447636 1654 !(current_thread()->options & TH_OPT_VMPRIV)) {
1c79356b
A
1655 mutex_unlock(&vm_page_queue_free_lock);
1656 mem = VM_PAGE_NULL;
1c79356b 1657 }
2d21ac55
A
1658 else {
1659 vm_page_t head;
1660 vm_page_t tail;
1661 unsigned int pages_to_steal;
1662 unsigned int color;
1c79356b 1663
2d21ac55 1664 while ( vm_page_free_count == 0 ) {
1c79356b 1665
2d21ac55
A
1666 mutex_unlock(&vm_page_queue_free_lock);
1667 /*
1668 * must be a privileged thread to be
1669 * in this state since a non-privileged
1670 * thread would have bailed if we were
1671 * under the vm_page_free_reserved mark
1672 */
1673 VM_PAGE_WAIT();
1674 mutex_lock(&vm_page_queue_free_lock);
1675 }
1676
1677 disable_preemption();
1678
1679 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1680 mutex_unlock(&vm_page_queue_free_lock);
1681
1682 /*
1683 * we got preempted and moved to another processor
1684 * or we got preempted and someone else ran and filled the cache
1685 */
1686 goto return_page_from_cpu_list;
1687 }
1688 if (vm_page_free_count <= vm_page_free_reserved)
1689 pages_to_steal = 1;
1690 else {
1691 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1692
1693 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1694 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1695 }
1696 color = PROCESSOR_DATA(current_processor(), start_color);
1697 head = tail = NULL;
1698
1699 while (pages_to_steal--) {
1700 if (--vm_page_free_count < vm_page_free_count_minimum)
1701 vm_page_free_count_minimum = vm_page_free_count;
1702
1703 while (queue_empty(&vm_page_queue_free[color]))
1704 color = (color + 1) & vm_color_mask;
1705
1706 queue_remove_first(&vm_page_queue_free[color],
1707 mem,
1708 vm_page_t,
1709 pageq);
1710 mem->pageq.next = NULL;
1711 mem->pageq.prev = NULL;
1712
1713 color = (color + 1) & vm_color_mask;
1714
1715 if (head == NULL)
1716 head = mem;
1717 else
1718 tail->pageq.next = (queue_t)mem;
1719 tail = mem;
1720
1721 mem->pageq.prev = NULL;
1722 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1723 assert(mem->tabled == FALSE);
1724 assert(mem->object == VM_OBJECT_NULL);
1725 assert(!mem->laundry);
1726 assert(mem->free);
1727 mem->free = FALSE;
1728
1729 assert(pmap_verify_free(mem->phys_page));
1730 assert(mem->busy);
1731 assert(!mem->free);
1732 assert(!mem->encrypted);
1733 assert(!mem->pmapped);
4a3eedf9 1734 assert(!mem->wpmapped);
2d21ac55
A
1735 }
1736 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1737 PROCESSOR_DATA(current_processor(), start_color) = color;
1738
1739 /*
1740 * satisfy this request
1741 */
1742 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1743 mem = head;
1744 mem->pageq.next = NULL;
91447636 1745
2d21ac55
A
1746 mutex_unlock(&vm_page_queue_free_lock);
1747
1748 enable_preemption();
1749 }
1c79356b
A
1750 /*
1751 * Decide if we should poke the pageout daemon.
1752 * We do this if the free count is less than the low
1753 * water mark, or if the free count is less than the high
1754 * water mark (but above the low water mark) and the inactive
1755 * count is less than its target.
1756 *
1757 * We don't have the counts locked ... if they change a little,
1758 * it doesn't really matter.
1759 */
1c79356b
A
1760 if ((vm_page_free_count < vm_page_free_min) ||
1761 ((vm_page_free_count < vm_page_free_target) &&
2d21ac55
A
1762 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1763 thread_wakeup((event_t) &vm_page_free_wanted);
1764
1765#if CONFIG_EMBEDDED
1766 {
1767 int percent_avail;
1768
1769 /*
1770 * Decide if we need to poke the memorystatus notification thread.
1771 */
1772 percent_avail =
1773 (vm_page_active_count + vm_page_inactive_count +
1774 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 1775 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
1776 atop_64(max_mem);
1777 if (percent_avail <= (kern_memorystatus_level - 5)) {
1778 kern_memorystatus_level = percent_avail;
1779 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1780 }
1781 }
1782#endif
1c79356b 1783
55e303ae 1784// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1c79356b
A
1785
1786 return mem;
1787}
1788
1789/*
1790 * vm_page_release:
1791 *
1792 * Return a page to the free list.
1793 */
1794
1795void
1796vm_page_release(
1797 register vm_page_t mem)
1798{
2d21ac55 1799 unsigned int color;
55e303ae
A
1800#if 0
1801 unsigned int pindex;
1802 phys_entry *physent;
1803
1804 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1805 if(physent->ppLink & ppN) { /* (BRINGUP) */
1806 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1807 }
1808 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1809#endif
1c79356b
A
1810 assert(!mem->private && !mem->fictitious);
1811
55e303ae 1812// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1c79356b
A
1813
1814 mutex_lock(&vm_page_queue_free_lock);
91447636 1815#if DEBUG
1c79356b
A
1816 if (mem->free)
1817 panic("vm_page_release");
91447636 1818#endif
1c79356b 1819 mem->free = TRUE;
2d21ac55
A
1820
1821 assert(mem->busy);
91447636
A
1822 assert(!mem->laundry);
1823 assert(mem->object == VM_OBJECT_NULL);
1824 assert(mem->pageq.next == NULL &&
1825 mem->pageq.prev == NULL);
2d21ac55
A
1826 assert(mem->listq.next == NULL &&
1827 mem->listq.prev == NULL);
1828
0c530ab8
A
1829 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1830 /*
1831 * this exists to support hardware controllers
1832 * incapable of generating DMAs with more than 32 bits
1833 * of address on platforms with physical memory > 4G...
1834 */
2d21ac55
A
1835 queue_enter_first(&vm_lopage_queue_free,
1836 mem,
1837 vm_page_t,
1838 pageq);
0c530ab8
A
1839 vm_lopage_free_count++;
1840 } else {
2d21ac55
A
1841 color = mem->phys_page & vm_color_mask;
1842 queue_enter_first(&vm_page_queue_free[color],
1843 mem,
1844 vm_page_t,
1845 pageq);
0c530ab8
A
1846 vm_page_free_count++;
1847 /*
1848 * Check if we should wake up someone waiting for page.
1849 * But don't bother waking them unless they can allocate.
1850 *
1851 * We wakeup only one thread, to prevent starvation.
1852 * Because the scheduling system handles wait queues FIFO,
1853 * if we wakeup all waiting threads, one greedy thread
1854 * can starve multiple niceguy threads. When the threads
1855 * all wakeup, the greedy threads runs first, grabs the page,
1856 * and waits for another page. It will be the first to run
1857 * when the next page is freed.
1858 *
1859 * However, there is a slight danger here.
1860 * The thread we wake might not use the free page.
1861 * Then the other threads could wait indefinitely
1862 * while the page goes unused. To forestall this,
1863 * the pageout daemon will keep making free pages
1864 * as long as vm_page_free_wanted is non-zero.
1865 */
1c79356b 1866
2d21ac55
A
1867 if ((vm_page_free_wanted_privileged > 0) && vm_page_free_count) {
1868 vm_page_free_wanted_privileged--;
1869 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
1870 } else if ((vm_page_free_wanted > 0) &&
1871 (vm_page_free_count >= vm_page_free_reserved)) {
0c530ab8
A
1872 vm_page_free_wanted--;
1873 thread_wakeup_one((event_t) &vm_page_free_count);
1874 }
1c79356b 1875 }
1c79356b 1876 mutex_unlock(&vm_page_queue_free_lock);
2d21ac55
A
1877
1878#if CONFIG_EMBEDDED
1879 {
1880 int percent_avail;
1881
1882 /*
1883 * Decide if we need to poke the memorystatus notification thread.
1884 * Locking is not a big issue, as only a single thread delivers these.
1885 */
1886 percent_avail =
1887 (vm_page_active_count + vm_page_inactive_count +
1888 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 1889 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
1890 atop_64(max_mem);
1891 if (percent_avail >= (kern_memorystatus_level + 5)) {
1892 kern_memorystatus_level = percent_avail;
1893 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1894 }
1895 }
1896#endif
1c79356b
A
1897}
1898
1c79356b
A
1899/*
1900 * vm_page_wait:
1901 *
1902 * Wait for a page to become available.
1903 * If there are plenty of free pages, then we don't sleep.
1904 *
1905 * Returns:
1906 * TRUE: There may be another page, try again
1907 * FALSE: We were interrupted out of our wait, don't try again
1908 */
1909
1910boolean_t
1911vm_page_wait(
1912 int interruptible )
1913{
1914 /*
1915 * We can't use vm_page_free_reserved to make this
1916 * determination. Consider: some thread might
1917 * need to allocate two pages. The first allocation
1918 * succeeds, the second fails. After the first page is freed,
1919 * a call to vm_page_wait must really block.
1920 */
9bccf70c 1921 kern_return_t wait_result;
9bccf70c 1922 int need_wakeup = 0;
2d21ac55 1923 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
1c79356b
A
1924
1925 mutex_lock(&vm_page_queue_free_lock);
2d21ac55
A
1926
1927 if (is_privileged && vm_page_free_count) {
1928 mutex_unlock(&vm_page_queue_free_lock);
1929 return TRUE;
1930 }
1c79356b 1931 if (vm_page_free_count < vm_page_free_target) {
2d21ac55
A
1932
1933 if (is_privileged) {
1934 if (vm_page_free_wanted_privileged++ == 0)
1935 need_wakeup = 1;
1936 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
1937 } else {
1938 if (vm_page_free_wanted++ == 0)
1939 need_wakeup = 1;
1940 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
1941 }
1c79356b
A
1942 mutex_unlock(&vm_page_queue_free_lock);
1943 counter(c_vm_page_wait_block++);
0b4e3aa0
A
1944
1945 if (need_wakeup)
1946 thread_wakeup((event_t)&vm_page_free_wanted);
9bccf70c 1947
91447636 1948 if (wait_result == THREAD_WAITING)
9bccf70c
A
1949 wait_result = thread_block(THREAD_CONTINUE_NULL);
1950
1c79356b
A
1951 return(wait_result == THREAD_AWAKENED);
1952 } else {
1953 mutex_unlock(&vm_page_queue_free_lock);
1954 return TRUE;
1955 }
1956}
1957
1958/*
1959 * vm_page_alloc:
1960 *
1961 * Allocate and return a memory cell associated
1962 * with this VM object/offset pair.
1963 *
1964 * Object must be locked.
1965 */
1966
1967vm_page_t
1968vm_page_alloc(
1969 vm_object_t object,
1970 vm_object_offset_t offset)
1971{
1972 register vm_page_t mem;
1973
2d21ac55 1974 vm_object_lock_assert_exclusive(object);
1c79356b
A
1975 mem = vm_page_grab();
1976 if (mem == VM_PAGE_NULL)
1977 return VM_PAGE_NULL;
1978
1979 vm_page_insert(mem, object, offset);
1980
1981 return(mem);
1982}
1983
0c530ab8
A
1984vm_page_t
1985vm_page_alloclo(
1986 vm_object_t object,
1987 vm_object_offset_t offset)
1988{
1989 register vm_page_t mem;
1990
2d21ac55 1991 vm_object_lock_assert_exclusive(object);
0c530ab8
A
1992 mem = vm_page_grablo();
1993 if (mem == VM_PAGE_NULL)
1994 return VM_PAGE_NULL;
1995
1996 vm_page_insert(mem, object, offset);
1997
1998 return(mem);
1999}
2000
2001
2d21ac55
A
2002/*
2003 * vm_page_alloc_guard:
2004 *
2005 * Allocate a ficticious page which will be used
2006 * as a guard page. The page will be inserted into
2007 * the object and returned to the caller.
2008 */
2009
2010vm_page_t
2011vm_page_alloc_guard(
2012 vm_object_t object,
2013 vm_object_offset_t offset)
2014{
2015 register vm_page_t mem;
2016
2017 vm_object_lock_assert_exclusive(object);
2018 mem = vm_page_grab_guard();
2019 if (mem == VM_PAGE_NULL)
2020 return VM_PAGE_NULL;
2021
2022 vm_page_insert(mem, object, offset);
2023
2024 return(mem);
2025}
2026
2027
1c79356b
A
2028counter(unsigned int c_laundry_pages_freed = 0;)
2029
91447636 2030boolean_t vm_page_free_verify = TRUE;
1c79356b
A
2031/*
2032 * vm_page_free:
2033 *
2034 * Returns the given page to the free list,
2035 * disassociating it with any VM object.
2036 *
2037 * Object and page queues must be locked prior to entry.
2038 */
2039void
2d21ac55 2040vm_page_free_prepare(
1c79356b
A
2041 register vm_page_t mem)
2042{
2d21ac55 2043 VM_PAGE_CHECK(mem);
1c79356b
A
2044 assert(!mem->free);
2045 assert(!mem->cleaning);
2046 assert(!mem->pageout);
2d21ac55
A
2047
2048#if DEBUG
91447636
A
2049 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2050 assert(pmap_verify_free(mem->phys_page));
2051 }
91447636 2052 if (mem->object)
2d21ac55 2053 vm_object_lock_assert_exclusive(mem->object);
91447636 2054 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1c79356b 2055
91447636
A
2056 if (mem->free)
2057 panic("vm_page_free: freeing page on free list\n");
2058#endif
2d21ac55
A
2059
2060 if (mem->laundry) {
2061 /*
2062 * We may have to free a page while it's being laundered
2063 * if we lost its pager (due to a forced unmount, for example).
2064 * We need to call vm_pageout_throttle_up() before removing
2065 * the page from its VM object, so that we can find out on
2066 * which pageout queue the page is.
2067 */
2068 vm_pageout_throttle_up(mem);
2069 counter(++c_laundry_pages_freed);
2070 }
2071
1c79356b
A
2072 if (mem->tabled)
2073 vm_page_remove(mem); /* clears tabled, object, offset */
1c79356b 2074
2d21ac55 2075 VM_PAGE_QUEUES_REMOVE(mem); /* clears active/inactive/throttled/speculative */
1c79356b
A
2076
2077 if (mem->wire_count) {
2078 if (!mem->private && !mem->fictitious)
2079 vm_page_wire_count--;
2080 mem->wire_count = 0;
2081 assert(!mem->gobbled);
2082 } else if (mem->gobbled) {
2083 if (!mem->private && !mem->fictitious)
2084 vm_page_wire_count--;
2085 vm_page_gobble_count--;
2086 }
2087 mem->gobbled = FALSE;
2088
1c79356b
A
2089 PAGE_WAKEUP(mem); /* clears wanted */
2090
0b4e3aa0 2091 /* Some of these may be unnecessary */
1c79356b
A
2092 mem->busy = TRUE;
2093 mem->absent = FALSE;
2094 mem->error = FALSE;
2095 mem->dirty = FALSE;
2096 mem->precious = FALSE;
2097 mem->reference = FALSE;
91447636 2098 mem->encrypted = FALSE;
2d21ac55
A
2099 mem->encrypted_cleaning = FALSE;
2100 mem->deactivated = FALSE;
2101 mem->pmapped = FALSE;
4a3eedf9 2102 mem->wpmapped = FALSE;
1c79356b
A
2103
2104 if (mem->private) {
2105 mem->private = FALSE;
2106 mem->fictitious = TRUE;
55e303ae 2107 mem->phys_page = vm_page_fictitious_addr;
1c79356b 2108 }
2d21ac55
A
2109 if (!mem->fictitious) {
2110 if (mem->zero_fill == TRUE) {
9bccf70c 2111 mem->zero_fill = FALSE;
2d21ac55 2112 OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
9bccf70c 2113 }
55e303ae 2114 vm_page_init(mem, mem->phys_page);
1c79356b
A
2115 }
2116}
2117
2d21ac55
A
2118void
2119vm_page_free(
2120 vm_page_t mem)
2121{
2122 vm_page_free_prepare(mem);
2123 if (mem->fictitious) {
2124 vm_page_release_fictitious(mem);
2125 } else {
2126 vm_page_release(mem);
2127 }
2128}
55e303ae 2129
2d21ac55
A
2130/*
2131 * Free a list of pages. The list can be up to several hundred pages,
2132 * as blocked up by vm_pageout_scan().
2133 * The big win is not having to take the page q and free list locks once
2134 * per page. We sort the incoming pages into n lists, one for
2135 * each color.
2136 *
2137 * The page queues must be locked, and are kept locked.
2138 */
55e303ae
A
2139void
2140vm_page_free_list(
2d21ac55 2141 vm_page_t mem)
55e303ae 2142{
2d21ac55
A
2143 vm_page_t nxt;
2144 int pg_count = 0;
2145 int color;
2146 int inuse_list_head = -1;
2147
2148 queue_head_t free_list[MAX_COLORS];
2149 int inuse[MAX_COLORS];
55e303ae 2150
2d21ac55
A
2151 for (color = 0; color < (signed) vm_colors; color++) {
2152 queue_init(&free_list[color]);
2153 }
2154
91447636
A
2155#if DEBUG
2156 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2157#endif
55e303ae 2158 while (mem) {
91447636
A
2159#if DEBUG
2160 if (mem->tabled || mem->object)
2161 panic("vm_page_free_list: freeing tabled page\n");
2d21ac55 2162 if (mem->inactive || mem->active || mem->throttled || mem->free)
91447636 2163 panic("vm_page_free_list: freeing page on list\n");
2d21ac55
A
2164 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2165 assert(pmap_verify_free(mem->phys_page));
2166 }
91447636
A
2167#endif
2168 assert(mem->pageq.prev == NULL);
2d21ac55
A
2169 assert(mem->busy);
2170 assert(!mem->free);
55e303ae
A
2171 nxt = (vm_page_t)(mem->pageq.next);
2172
55e303ae 2173 if (!mem->fictitious) {
935ed37a
A
2174 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
2175 mem->pageq.next = NULL;
2176 vm_page_release(mem);
2177 } else {
2178 mem->free = TRUE;
2179
2180 color = mem->phys_page & vm_color_mask;
2181 if (queue_empty(&free_list[color])) {
2182 inuse[color] = inuse_list_head;
2183 inuse_list_head = color;
2184 }
2185 queue_enter_first(&free_list[color],
2186 mem,
2187 vm_page_t,
2188 pageq);
2189 pg_count++;
2d21ac55 2190 }
55e303ae 2191 } else {
2d21ac55
A
2192 assert(mem->phys_page == vm_page_fictitious_addr ||
2193 mem->phys_page == vm_page_guard_addr);
55e303ae
A
2194 vm_page_release_fictitious(mem);
2195 }
2196 mem = nxt;
2197 }
2d21ac55
A
2198 if (pg_count) {
2199 unsigned int avail_free_count;
2200
55e303ae
A
2201 mutex_lock(&vm_page_queue_free_lock);
2202
2d21ac55
A
2203 color = inuse_list_head;
2204
2205 while( color != -1 ) {
2206 vm_page_t first, last;
2207 vm_page_t first_free;
2208
2209 first = (vm_page_t) queue_first(&free_list[color]);
2210 last = (vm_page_t) queue_last(&free_list[color]);
2211 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
55e303ae 2212
2d21ac55
A
2213 if (queue_empty(&vm_page_queue_free[color])) {
2214 queue_last(&vm_page_queue_free[color]) =
2215 (queue_entry_t) last;
2216 } else {
2217 queue_prev(&first_free->pageq) =
2218 (queue_entry_t) last;
2219 }
2220 queue_first(&vm_page_queue_free[color]) =
2221 (queue_entry_t) first;
2222 queue_prev(&first->pageq) =
2223 (queue_entry_t) &vm_page_queue_free[color];
2224 queue_next(&last->pageq) =
2225 (queue_entry_t) first_free;
2226 color = inuse[color];
2227 }
2228
55e303ae 2229 vm_page_free_count += pg_count;
2d21ac55
A
2230 avail_free_count = vm_page_free_count;
2231
2232 while ((vm_page_free_wanted_privileged > 0) && avail_free_count) {
2233 vm_page_free_wanted_privileged--;
2234 avail_free_count--;
2235
2236 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2237 }
55e303ae
A
2238
2239 if ((vm_page_free_wanted > 0) &&
2d21ac55 2240 (avail_free_count >= vm_page_free_reserved)) {
91447636 2241 unsigned int available_pages;
55e303ae 2242
2d21ac55
A
2243 if (avail_free_count >= vm_page_free_reserved) {
2244 available_pages = (avail_free_count - vm_page_free_reserved);
91447636
A
2245 } else {
2246 available_pages = 0;
2247 }
55e303ae
A
2248
2249 if (available_pages >= vm_page_free_wanted) {
2250 vm_page_free_wanted = 0;
2251 thread_wakeup((event_t) &vm_page_free_count);
2252 } else {
2253 while (available_pages--) {
2254 vm_page_free_wanted--;
2255 thread_wakeup_one((event_t) &vm_page_free_count);
2256 }
2257 }
2258 }
2259 mutex_unlock(&vm_page_queue_free_lock);
2d21ac55
A
2260
2261#if CONFIG_EMBEDDED
2262 {
2263 int percent_avail;
2264
2265 /*
2266 * Decide if we need to poke the memorystatus notification thread.
2267 */
2268 percent_avail =
2269 (vm_page_active_count + vm_page_inactive_count +
2270 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 2271 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
2272 atop_64(max_mem);
2273 if (percent_avail >= (kern_memorystatus_level + 5)) {
2274 kern_memorystatus_level = percent_avail;
2275 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2276 }
2277 }
2278#endif
55e303ae
A
2279 }
2280}
2281
2282
1c79356b
A
2283/*
2284 * vm_page_wire:
2285 *
2286 * Mark this page as wired down by yet
2287 * another map, removing it from paging queues
2288 * as necessary.
2289 *
2290 * The page's object and the page queues must be locked.
2291 */
2292void
2293vm_page_wire(
2294 register vm_page_t mem)
2295{
2296
91447636 2297// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1c79356b
A
2298
2299 VM_PAGE_CHECK(mem);
91447636
A
2300#if DEBUG
2301 if (mem->object)
2d21ac55 2302 vm_object_lock_assert_exclusive(mem->object);
91447636
A
2303 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2304#endif
1c79356b
A
2305 if (mem->wire_count == 0) {
2306 VM_PAGE_QUEUES_REMOVE(mem);
2307 if (!mem->private && !mem->fictitious && !mem->gobbled)
2308 vm_page_wire_count++;
2309 if (mem->gobbled)
2310 vm_page_gobble_count--;
2311 mem->gobbled = FALSE;
2d21ac55 2312 if (mem->zero_fill == TRUE) {
9bccf70c 2313 mem->zero_fill = FALSE;
2d21ac55 2314 OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
9bccf70c 2315 }
593a1d5f
A
2316#if CONFIG_EMBEDDED
2317 {
2318 int percent_avail;
2319
2320 /*
2321 * Decide if we need to poke the memorystatus notification thread.
2322 */
2323 percent_avail =
2324 (vm_page_active_count + vm_page_inactive_count +
2325 vm_page_speculative_count + vm_page_free_count +
2326 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2327 atop_64(max_mem);
2328 if (percent_avail <= (kern_memorystatus_level - 5)) {
2329 kern_memorystatus_level = percent_avail;
2330 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2331 }
2332 }
2333#endif
91447636
A
2334 /*
2335 * ENCRYPTED SWAP:
2336 * The page could be encrypted, but
2337 * We don't have to decrypt it here
2338 * because we don't guarantee that the
2339 * data is actually valid at this point.
2340 * The page will get decrypted in
2341 * vm_fault_wire() if needed.
2342 */
1c79356b
A
2343 }
2344 assert(!mem->gobbled);
2345 mem->wire_count++;
2346}
2347
2348/*
2349 * vm_page_gobble:
2350 *
2351 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2352 *
2353 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2354 */
2355void
2356vm_page_gobble(
2357 register vm_page_t mem)
2358{
2d21ac55 2359 vm_page_lockspin_queues();
1c79356b
A
2360 VM_PAGE_CHECK(mem);
2361
2362 assert(!mem->gobbled);
2363 assert(mem->wire_count == 0);
2364
2365 if (!mem->gobbled && mem->wire_count == 0) {
2366 if (!mem->private && !mem->fictitious)
2367 vm_page_wire_count++;
2368 }
2369 vm_page_gobble_count++;
2370 mem->gobbled = TRUE;
2371 vm_page_unlock_queues();
2372}
2373
2374/*
2375 * vm_page_unwire:
2376 *
2377 * Release one wiring of this page, potentially
2378 * enabling it to be paged again.
2379 *
2380 * The page's object and the page queues must be locked.
2381 */
2382void
2383vm_page_unwire(
2384 register vm_page_t mem)
2385{
2386
91447636 2387// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1c79356b
A
2388
2389 VM_PAGE_CHECK(mem);
2390 assert(mem->wire_count > 0);
91447636
A
2391#if DEBUG
2392 if (mem->object)
2d21ac55 2393 vm_object_lock_assert_exclusive(mem->object);
91447636
A
2394 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2395#endif
1c79356b
A
2396 if (--mem->wire_count == 0) {
2397 assert(!mem->private && !mem->fictitious);
2398 vm_page_wire_count--;
91447636
A
2399 assert(!mem->laundry);
2400 assert(mem->object != kernel_object);
2401 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
593a1d5f
A
2402 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2403 vm_page_deactivate(mem);
2d21ac55 2404 } else {
593a1d5f 2405 vm_page_activate(mem);
2d21ac55 2406 }
593a1d5f
A
2407#if CONFIG_EMBEDDED
2408 {
2409 int percent_avail;
2410
2411 /*
2412 * Decide if we need to poke the memorystatus notification thread.
2413 */
2414 percent_avail =
2415 (vm_page_active_count + vm_page_inactive_count +
2416 vm_page_speculative_count + vm_page_free_count +
2417 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2418 atop_64(max_mem);
2419 if (percent_avail >= (kern_memorystatus_level + 5)) {
2420 kern_memorystatus_level = percent_avail;
2421 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2422 }
2423 }
2424#endif
1c79356b
A
2425 }
2426}
2427
2d21ac55 2428
1c79356b
A
2429/*
2430 * vm_page_deactivate:
2431 *
2432 * Returns the given page to the inactive list,
2433 * indicating that no physical maps have access
2434 * to this page. [Used by the physical mapping system.]
2435 *
2436 * The page queues must be locked.
2437 */
2438void
2439vm_page_deactivate(
2440 register vm_page_t m)
2441{
2d21ac55
A
2442 boolean_t rapid_age = FALSE;
2443
1c79356b 2444 VM_PAGE_CHECK(m);
91447636 2445 assert(m->object != kernel_object);
2d21ac55 2446 assert(m->phys_page != vm_page_guard_addr);
1c79356b 2447
55e303ae 2448// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
91447636
A
2449#if DEBUG
2450 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2451#endif
1c79356b
A
2452 /*
2453 * This page is no longer very interesting. If it was
2454 * interesting (active or inactive/referenced), then we
2455 * clear the reference bit and (re)enter it in the
2456 * inactive queue. Note wired pages should not have
2457 * their reference bit cleared.
2458 */
2459 if (m->gobbled) { /* can this happen? */
2460 assert(m->wire_count == 0);
2d21ac55 2461
1c79356b
A
2462 if (!m->private && !m->fictitious)
2463 vm_page_wire_count--;
2464 vm_page_gobble_count--;
2465 m->gobbled = FALSE;
2466 }
2467 if (m->private || (m->wire_count != 0))
2468 return;
2d21ac55
A
2469
2470 if (m->active && m->deactivated == TRUE) {
2471 if (!pmap_is_referenced(m->phys_page))
2472 rapid_age = TRUE;
1c79356b 2473 }
2d21ac55
A
2474 if (rapid_age == FALSE && !m->fictitious && !m->absent)
2475 pmap_clear_reference(m->phys_page);
2476
2477 m->reference = FALSE;
2478 m->deactivated = FALSE;
2479 m->no_cache = FALSE;
2480
2481 if (!m->inactive) {
2482 VM_PAGE_QUEUES_REMOVE(m);
0b4e3aa0 2483
91447636
A
2484 assert(!m->laundry);
2485 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2d21ac55
A
2486
2487 if (!IP_VALID(memory_manager_default) &&
2488 m->dirty && m->object->internal &&
2489 (m->object->purgable == VM_PURGABLE_DENY ||
cf7d32b8
A
2490 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2491 m->object->purgable == VM_PURGABLE_VOLATILE )) {
2d21ac55
A
2492 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2493 m->throttled = TRUE;
2494 vm_page_throttled_count++;
9bccf70c 2495 } else {
2d21ac55
A
2496 if (rapid_age == TRUE ||
2497 (!m->fictitious && m->object->named && m->object->ref_count == 1)) {
2498 vm_page_speculate(m, FALSE);
2499 vm_page_speculative_recreated++;
2500 return;
2501 } else {
2502 if (m->zero_fill) {
2503 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2504 vm_zf_queue_count++;
2505 } else {
2506 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2507 }
2508 }
2509 m->inactive = TRUE;
2510 if (!m->fictitious) {
2511 vm_page_inactive_count++;
2512 token_new_pagecount++;
2513 }
9bccf70c 2514 }
1c79356b
A
2515 }
2516}
2517
2518/*
2519 * vm_page_activate:
2520 *
2521 * Put the specified page on the active list (if appropriate).
2522 *
2523 * The page queues must be locked.
2524 */
2525
2526void
2527vm_page_activate(
2528 register vm_page_t m)
2529{
2530 VM_PAGE_CHECK(m);
2d21ac55 2531#ifdef FIXME_4778297
91447636 2532 assert(m->object != kernel_object);
2d21ac55
A
2533#endif
2534 assert(m->phys_page != vm_page_guard_addr);
91447636
A
2535#if DEBUG
2536 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2537#endif
1c79356b
A
2538 if (m->gobbled) {
2539 assert(m->wire_count == 0);
2540 if (!m->private && !m->fictitious)
2541 vm_page_wire_count--;
2542 vm_page_gobble_count--;
2543 m->gobbled = FALSE;
2544 }
2545 if (m->private)
2546 return;
2547
2d21ac55
A
2548#if DEBUG
2549 if (m->active)
2550 panic("vm_page_activate: already active");
2551#endif
2552
2553 if (m->speculative) {
2554 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2555 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2556 }
2557
2558 VM_PAGE_QUEUES_REMOVE(m);
2559
2560 if (m->wire_count == 0) {
91447636 2561 assert(!m->laundry);
2d21ac55
A
2562 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2563 if (!IP_VALID(memory_manager_default) &&
2564 !m->fictitious && m->dirty && m->object->internal &&
2565 (m->object->purgable == VM_PURGABLE_DENY ||
cf7d32b8
A
2566 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2567 m->object->purgable == VM_PURGABLE_VOLATILE )) {
2d21ac55
A
2568 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2569 m->throttled = TRUE;
2570 vm_page_throttled_count++;
9bccf70c 2571 } else {
2d21ac55
A
2572 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2573 m->active = TRUE;
2574 if (!m->fictitious)
2575 vm_page_active_count++;
9bccf70c 2576 }
2d21ac55
A
2577 m->reference = TRUE;
2578 m->no_cache = FALSE;
1c79356b 2579 }
2d21ac55
A
2580}
2581
2582
2583/*
2584 * vm_page_speculate:
2585 *
2586 * Put the specified page on the speculative list (if appropriate).
2587 *
2588 * The page queues must be locked.
2589 */
2590void
2591vm_page_speculate(
2592 vm_page_t m,
2593 boolean_t new)
2594{
2595 struct vm_speculative_age_q *aq;
2596
2597 VM_PAGE_CHECK(m);
2598 assert(m->object != kernel_object);
2599 assert(!m->speculative && !m->active && !m->inactive && !m->throttled);
2600 assert(m->phys_page != vm_page_guard_addr);
2601 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
91447636 2602#if DEBUG
2d21ac55 2603 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
91447636 2604#endif
2d21ac55
A
2605 if (m->wire_count == 0) {
2606 mach_timespec_t ts;
2607
2608 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec);
2609
2610 if (vm_page_speculative_count == 0) {
2611
2612 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2613 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2614
2615 aq = &vm_page_queue_speculative[speculative_age_index];
2616
2617 /*
2618 * set the timer to begin a new group
2619 */
2620 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2621 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2622
2623 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2624 } else {
2625 aq = &vm_page_queue_speculative[speculative_age_index];
2626
2627 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2628
2629 speculative_age_index++;
2630
2631 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2632 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2633 if (speculative_age_index == speculative_steal_index) {
2634 speculative_steal_index = speculative_age_index + 1;
2635
2636 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2637 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2638 }
2639 aq = &vm_page_queue_speculative[speculative_age_index];
2640
2641 if (!queue_empty(&aq->age_q))
2642 vm_page_speculate_ageit(aq);
2643
2644 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2645 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2646
2647 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2648 }
2649 }
2650 enqueue_tail(&aq->age_q, &m->pageq);
2651 m->speculative = TRUE;
2652 vm_page_speculative_count++;
2653
2654 if (new == TRUE) {
2655 m->object->pages_created++;
2656 vm_page_speculative_created++;
2657 }
2658 }
2659}
2660
2661
2662/*
2663 * move pages from the specified aging bin to
2664 * the speculative bin that pageout_scan claims from
2665 *
2666 * The page queues must be locked.
2667 */
2668void
2669vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2670{
2671 struct vm_speculative_age_q *sq;
2672 vm_page_t t;
2673
2674 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2675
2676 if (queue_empty(&sq->age_q)) {
2677 sq->age_q.next = aq->age_q.next;
2678 sq->age_q.prev = aq->age_q.prev;
2679
2680 t = (vm_page_t)sq->age_q.next;
2681 t->pageq.prev = &sq->age_q;
2682
2683 t = (vm_page_t)sq->age_q.prev;
2684 t->pageq.next = &sq->age_q;
2685 } else {
2686 t = (vm_page_t)sq->age_q.prev;
2687 t->pageq.next = aq->age_q.next;
2688
2689 t = (vm_page_t)aq->age_q.next;
2690 t->pageq.prev = sq->age_q.prev;
2691
2692 t = (vm_page_t)aq->age_q.prev;
2693 t->pageq.next = &sq->age_q;
2694
2695 sq->age_q.prev = aq->age_q.prev;
1c79356b 2696 }
2d21ac55
A
2697 queue_init(&aq->age_q);
2698}
2699
2700
2701void
2702vm_page_lru(
2703 vm_page_t m)
2704{
2705 VM_PAGE_CHECK(m);
2706 assert(m->object != kernel_object);
2707 assert(m->phys_page != vm_page_guard_addr);
2708
2709#if DEBUG
2710 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
2711#endif
2712 if (m->active || m->reference)
2713 return;
2714
2715 if (m->private || (m->wire_count != 0))
2716 return;
2717
2718 m->no_cache = FALSE;
2719
2720 VM_PAGE_QUEUES_REMOVE(m);
2721
2722 assert(!m->laundry);
2723 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2724
2725 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2726 m->inactive = TRUE;
2727
2728 vm_page_inactive_count++;
2729 token_new_pagecount++;
1c79356b
A
2730}
2731
2d21ac55 2732
1c79356b
A
2733/*
2734 * vm_page_part_zero_fill:
2735 *
2736 * Zero-fill a part of the page.
2737 */
2738void
2739vm_page_part_zero_fill(
2740 vm_page_t m,
2741 vm_offset_t m_pa,
2742 vm_size_t len)
2743{
2744 vm_page_t tmp;
2745
2746 VM_PAGE_CHECK(m);
2747#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
55e303ae 2748 pmap_zero_part_page(m->phys_page, m_pa, len);
1c79356b
A
2749#else
2750 while (1) {
2751 tmp = vm_page_grab();
2752 if (tmp == VM_PAGE_NULL) {
2753 vm_page_wait(THREAD_UNINT);
2754 continue;
2755 }
2756 break;
2757 }
2758 vm_page_zero_fill(tmp);
2759 if(m_pa != 0) {
2760 vm_page_part_copy(m, 0, tmp, 0, m_pa);
2761 }
2762 if((m_pa + len) < PAGE_SIZE) {
2763 vm_page_part_copy(m, m_pa + len, tmp,
2764 m_pa + len, PAGE_SIZE - (m_pa + len));
2765 }
2766 vm_page_copy(tmp,m);
2767 vm_page_lock_queues();
2768 vm_page_free(tmp);
2769 vm_page_unlock_queues();
2770#endif
2771
2772}
2773
2774/*
2775 * vm_page_zero_fill:
2776 *
2777 * Zero-fill the specified page.
2778 */
2779void
2780vm_page_zero_fill(
2781 vm_page_t m)
2782{
2783 XPR(XPR_VM_PAGE,
2784 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
2785 (integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
2786
2787 VM_PAGE_CHECK(m);
2788
55e303ae
A
2789// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
2790 pmap_zero_page(m->phys_page);
1c79356b
A
2791}
2792
2793/*
2794 * vm_page_part_copy:
2795 *
2796 * copy part of one page to another
2797 */
2798
2799void
2800vm_page_part_copy(
2801 vm_page_t src_m,
2802 vm_offset_t src_pa,
2803 vm_page_t dst_m,
2804 vm_offset_t dst_pa,
2805 vm_size_t len)
2806{
2807 VM_PAGE_CHECK(src_m);
2808 VM_PAGE_CHECK(dst_m);
2809
55e303ae
A
2810 pmap_copy_part_page(src_m->phys_page, src_pa,
2811 dst_m->phys_page, dst_pa, len);
1c79356b
A
2812}
2813
2814/*
2815 * vm_page_copy:
2816 *
2817 * Copy one page to another
91447636
A
2818 *
2819 * ENCRYPTED SWAP:
2820 * The source page should not be encrypted. The caller should
2821 * make sure the page is decrypted first, if necessary.
1c79356b
A
2822 */
2823
2d21ac55
A
2824int vm_page_copy_cs_validations = 0;
2825int vm_page_copy_cs_tainted = 0;
2826
1c79356b
A
2827void
2828vm_page_copy(
2829 vm_page_t src_m,
2830 vm_page_t dest_m)
2831{
2832 XPR(XPR_VM_PAGE,
2833 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
2834 (integer_t)src_m->object, src_m->offset,
2835 (integer_t)dest_m->object, dest_m->offset,
2836 0);
2837
2838 VM_PAGE_CHECK(src_m);
2839 VM_PAGE_CHECK(dest_m);
2840
91447636
A
2841 /*
2842 * ENCRYPTED SWAP:
2843 * The source page should not be encrypted at this point.
2844 * The destination page will therefore not contain encrypted
2845 * data after the copy.
2846 */
2847 if (src_m->encrypted) {
2848 panic("vm_page_copy: source page %p is encrypted\n", src_m);
2849 }
2850 dest_m->encrypted = FALSE;
2851
2d21ac55 2852 if (src_m->object != VM_OBJECT_NULL &&
4a3eedf9 2853 src_m->object->code_signed) {
2d21ac55 2854 /*
4a3eedf9 2855 * We're copying a page from a code-signed object.
2d21ac55
A
2856 * Whoever ends up mapping the copy page might care about
2857 * the original page's integrity, so let's validate the
2858 * source page now.
2859 */
2860 vm_page_copy_cs_validations++;
2861 vm_page_validate_cs(src_m);
2862 }
2863 /*
2864 * Propagate the code-signing bits to the copy page.
2865 */
2866 dest_m->cs_validated = src_m->cs_validated;
2867 dest_m->cs_tainted = src_m->cs_tainted;
2868 if (dest_m->cs_tainted) {
2869 assert(dest_m->cs_validated);
2870 vm_page_copy_cs_tainted++;
2871 }
2872
55e303ae 2873 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
1c79356b
A
2874}
2875
2d21ac55 2876#if MACH_ASSERT
1c79356b
A
2877/*
2878 * Check that the list of pages is ordered by
2879 * ascending physical address and has no holes.
2880 */
2d21ac55 2881static int
1c79356b
A
2882vm_page_verify_contiguous(
2883 vm_page_t pages,
2884 unsigned int npages)
2885{
2886 register vm_page_t m;
2887 unsigned int page_count;
91447636 2888 vm_offset_t prev_addr;
1c79356b 2889
55e303ae 2890 prev_addr = pages->phys_page;
1c79356b
A
2891 page_count = 1;
2892 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
55e303ae 2893 if (m->phys_page != prev_addr + 1) {
2d21ac55 2894 printf("m %p prev_addr 0x%x, current addr 0x%x\n",
55e303ae 2895 m, prev_addr, m->phys_page);
2d21ac55 2896 printf("pages %p page_count %d\n", pages, page_count);
1c79356b
A
2897 panic("vm_page_verify_contiguous: not contiguous!");
2898 }
55e303ae 2899 prev_addr = m->phys_page;
1c79356b
A
2900 ++page_count;
2901 }
2902 if (page_count != npages) {
2d21ac55 2903 printf("pages %p actual count 0x%x but requested 0x%x\n",
1c79356b
A
2904 pages, page_count, npages);
2905 panic("vm_page_verify_contiguous: count error");
2906 }
2907 return 1;
2908}
2909#endif /* MACH_ASSERT */
2910
2911
2d21ac55
A
2912#if MACH_ASSERT
2913/*
2914 * Check the free lists for proper length etc.
2915 */
2916static void
2917vm_page_verify_free_lists( void )
2918{
2919 unsigned int color, npages;
2920 vm_page_t m;
2921 vm_page_t prev_m;
2922
2923 npages = 0;
2924
2925 mutex_lock(&vm_page_queue_free_lock);
2926
2927 for( color = 0; color < vm_colors; color++ ) {
2928 prev_m = (vm_page_t) &vm_page_queue_free[color];
2929 queue_iterate(&vm_page_queue_free[color],
2930 m,
2931 vm_page_t,
2932 pageq) {
2933 if ((vm_page_t) m->pageq.prev != prev_m)
2934 panic("vm_page_verify_free_lists: corrupted prev ptr");
2935 if ( ! m->free )
2936 panic("vm_page_verify_free_lists: not free");
2937 if ( ! m->busy )
2938 panic("vm_page_verify_free_lists: not busy");
2939 if ( (m->phys_page & vm_color_mask) != color)
2940 panic("vm_page_verify_free_lists: wrong color");
2941 ++npages;
2942 prev_m = m;
2943 }
2944 }
2945 if (npages != vm_page_free_count)
2946 panic("vm_page_verify_free_lists: npages %u free_count %d",
2947 npages, vm_page_free_count);
2948
2949 mutex_unlock(&vm_page_queue_free_lock);
2950}
2951#endif /* MACH_ASSERT */
2952
2953
91447636 2954
1c79356b 2955/*
2d21ac55
A
2956 * CONTIGUOUS PAGE ALLOCATION
2957 * Additional levels of effort:
2958 * + consider pages that are currently 'pmapped'
2959 * this could be expensive since we'd have
2960 * to ask the pmap layer about there state
2961 * + consider dirty pages
2962 * either clean them or
2963 * copy them to other locations...
2964 *
2965 * Find a region large enough to contain at least n pages
1c79356b
A
2966 * of contiguous physical memory.
2967 *
2d21ac55
A
2968 * This is done by traversing the vm_page_t array in a linear fashion
2969 * we assume that the vm_page_t array has the avaiable physical pages in an
2970 * ordered, ascending list... this is currently true of all our implementations
2971 * and must remain so... there can be 'holes' in the array... we also can
2972 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
2973 * which use to happen via 'vm_page_convert'... that function was no longer
2974 * being called and was removed...
2975 *
2976 * The basic flow consists of stabilizing some of the interesting state of
2977 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
2978 * sweep at the beginning of the array looking for pages that meet our criterea
2979 * for a 'stealable' page... currently we are pretty conservative... if the page
2980 * meets this criterea and is physically contiguous to the previous page in the 'run'
2981 * we keep developing it. If we hit a page that doesn't fit, we reset our state
2982 * and start to develop a new run... if at this point we've already considered
2983 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
2984 * and mutex_pause (which will yield the processor), to keep the latency low w/r
2985 * to other threads trying to acquire free pages (or move pages from q to q),
2986 * and then continue from the spot we left off... we only make 1 pass through the
2987 * array. Once we have a 'run' that is long enough, we'll go into the loop which
2988 * which steals the pages from the queues they're currently on... pages on the free
2989 * queue can be stolen directly... pages that are on any of the other queues
2990 * must be removed from the object they are tabled on... this requires taking the
2991 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
2992 * or if the state of the page behind the vm_object lock is no longer viable, we'll
2993 * dump the pages we've currently stolen back to the free list, and pick up our
2994 * scan from the point where we aborted the 'current' run.
2995 *
2996 *
1c79356b 2997 * Requirements:
2d21ac55 2998 * - neither vm_page_queue nor vm_free_list lock can be held on entry
1c79356b 2999 *
2d21ac55 3000 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
1c79356b 3001 *
e5568f75 3002 * Algorithm:
1c79356b 3003 */
2d21ac55
A
3004
3005#define MAX_CONSIDERED_BEFORE_YIELD 1000
3006
3007
3008#define RESET_STATE_OF_RUN() \
3009 MACRO_BEGIN \
3010 prevcontaddr = -2; \
3011 free_considered = 0; \
3012 substitute_needed = 0; \
3013 npages = 0; \
3014 MACRO_END
3015
3016
1c79356b
A
3017static vm_page_t
3018vm_page_find_contiguous(
2d21ac55
A
3019 unsigned int contig_pages,
3020 ppnum_t max_pnum,
3021 boolean_t wire)
1c79356b 3022{
2d21ac55 3023 vm_page_t m = NULL;
e5568f75 3024 ppnum_t prevcontaddr;
2d21ac55
A
3025 unsigned int npages, considered;
3026 unsigned int page_idx, start_idx;
3027 int free_considered, free_available;
3028 int substitute_needed;
593a1d5f 3029#if DEBUG
2d21ac55 3030 uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
593a1d5f
A
3031#endif
3032#if MACH_ASSERT
2d21ac55
A
3033 int yielded = 0;
3034 int dumped_run = 0;
3035 int stolen_pages = 0;
91447636 3036#endif
1c79356b 3037
2d21ac55 3038 if (contig_pages == 0)
1c79356b
A
3039 return VM_PAGE_NULL;
3040
2d21ac55
A
3041#if MACH_ASSERT
3042 vm_page_verify_free_lists();
593a1d5f
A
3043#endif
3044#if DEBUG
2d21ac55
A
3045 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3046#endif
3047 vm_page_lock_queues();
3048 mutex_lock(&vm_page_queue_free_lock);
3049
3050 RESET_STATE_OF_RUN();
1c79356b 3051
2d21ac55
A
3052 considered = 0;
3053 free_available = vm_page_free_count - vm_page_free_reserved;
e5568f75 3054
2d21ac55
A
3055 for (page_idx = 0, start_idx = 0;
3056 npages < contig_pages && page_idx < vm_pages_count;
3057 page_idx++) {
3058retry:
3059 m = &vm_pages[page_idx];
e5568f75 3060
2d21ac55
A
3061 if (max_pnum && m->phys_page > max_pnum) {
3062 /* no more low pages... */
3063 break;
e5568f75 3064 }
2d21ac55
A
3065 if (m->phys_page <= vm_lopage_poolend &&
3066 m->phys_page >= vm_lopage_poolstart) {
3067 /*
3068 * don't want to take pages from our
3069 * reserved pool of low memory
3070 * so don't consider it which
3071 * means starting a new run
3072 */
3073 RESET_STATE_OF_RUN();
e5568f75 3074
2d21ac55
A
3075 } else if (m->wire_count || m->gobbled ||
3076 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3077 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3078 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending) {
3079 /*
3080 * page is in a transient state
3081 * or a state we don't want to deal
3082 * with, so don't consider it which
3083 * means starting a new run
3084 */
3085 RESET_STATE_OF_RUN();
1c79356b 3086
2d21ac55
A
3087 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3088 /*
3089 * page needs to be on one of our queues
3090 * in order for it to be stable behind the
3091 * locks we hold at this point...
3092 * if not, don't consider it which
3093 * means starting a new run
3094 */
3095 RESET_STATE_OF_RUN();
3096
3097 } else if (!m->free && (!m->tabled || m->busy)) {
3098 /*
3099 * pages on the free list are always 'busy'
3100 * so we couldn't test for 'busy' in the check
3101 * for the transient states... pages that are
3102 * 'free' are never 'tabled', so we also couldn't
3103 * test for 'tabled'. So we check here to make
3104 * sure that a non-free page is not busy and is
3105 * tabled on an object...
3106 * if not, don't consider it which
3107 * means starting a new run
3108 */
3109 RESET_STATE_OF_RUN();
3110
3111 } else {
3112 if (m->phys_page != prevcontaddr + 1) {
e5568f75 3113 npages = 1;
2d21ac55
A
3114 start_idx = page_idx;
3115 } else {
3116 npages++;
e5568f75 3117 }
2d21ac55
A
3118 prevcontaddr = m->phys_page;
3119
3120 if (m->pmapped || m->dirty)
3121 substitute_needed++;
3122
3123 if (m->free) {
3124 free_considered++;
3125 }
3126 if ((free_considered + substitute_needed) > free_available) {
3127 /*
3128 * if we let this run continue
3129 * we will end up dropping the vm_page_free_count
3130 * below the reserve limit... we need to abort
3131 * this run, but we can at least re-consider this
3132 * page... thus the jump back to 'retry'
3133 */
3134 RESET_STATE_OF_RUN();
3135
3136 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3137 considered++;
3138 goto retry;
e5568f75 3139 }
2d21ac55
A
3140 /*
3141 * free_available == 0
3142 * so can't consider any free pages... if
3143 * we went to retry in this case, we'd
3144 * get stuck looking at the same page
3145 * w/o making any forward progress
3146 * we also want to take this path if we've already
3147 * reached our limit that controls the lock latency
3148 */
e5568f75 3149 }
2d21ac55
A
3150 }
3151 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3152
3153 mutex_unlock(&vm_page_queue_free_lock);
3154 vm_page_unlock_queues();
e5568f75 3155
2d21ac55
A
3156 mutex_pause(0);
3157
3158 vm_page_lock_queues();
3159 mutex_lock(&vm_page_queue_free_lock);
3160
3161 RESET_STATE_OF_RUN();
1c79356b 3162 /*
2d21ac55
A
3163 * reset our free page limit since we
3164 * dropped the lock protecting the vm_page_free_queue
1c79356b 3165 */
2d21ac55
A
3166 free_available = vm_page_free_count - vm_page_free_reserved;
3167 considered = 0;
3168#if MACH_ASSERT
3169 yielded++;
3170#endif
3171 goto retry;
3172 }
3173 considered++;
3174 }
3175 m = VM_PAGE_NULL;
3176
3177 if (npages != contig_pages)
3178 mutex_unlock(&vm_page_queue_free_lock);
3179 else {
3180 vm_page_t m1;
3181 vm_page_t m2;
3182 unsigned int cur_idx;
3183 unsigned int tmp_start_idx;
3184 vm_object_t locked_object = VM_OBJECT_NULL;
3185 boolean_t abort_run = FALSE;
3186
3187 tmp_start_idx = start_idx;
3188
3189 /*
3190 * first pass through to pull the free pages
3191 * off of the free queue so that in case we
3192 * need substitute pages, we won't grab any
3193 * of the free pages in the run... we'll clear
3194 * the 'free' bit in the 2nd pass, and even in
3195 * an abort_run case, we'll collect all of the
3196 * free pages in this run and return them to the free list
3197 */
3198 while (start_idx < page_idx) {
3199
3200 m1 = &vm_pages[start_idx++];
3201
3202 if (m1->free) {
3203 unsigned int color;
3204
3205 color = m1->phys_page & vm_color_mask;
3206 queue_remove(&vm_page_queue_free[color],
3207 m1,
3208 vm_page_t,
3209 pageq);
3210
3211 vm_page_free_count--;
3212 }
3213 }
3214 /*
3215 * adjust global freelist counts
3216 */
3217 if (vm_page_free_count < vm_page_free_count_minimum)
3218 vm_page_free_count_minimum = vm_page_free_count;
3219
3220 /*
3221 * we can drop the free queue lock at this point since
3222 * we've pulled any 'free' candidates off of the list
3223 * we need it dropped so that we can do a vm_page_grab
3224 * when substituing for pmapped/dirty pages
3225 */
3226 mutex_unlock(&vm_page_queue_free_lock);
3227
3228 start_idx = tmp_start_idx;
3229 cur_idx = page_idx - 1;
3230
3231 while (start_idx++ < page_idx) {
3232 /*
3233 * must go through the list from back to front
3234 * so that the page list is created in the
3235 * correct order - low -> high phys addresses
3236 */
3237 m1 = &vm_pages[cur_idx--];
3238
3239 if (m1->free) {
3240 /*
3241 * pages have already been removed from
3242 * the free list in the 1st pass
3243 */
3244 assert(m1->free);
3245 assert(m1->busy);
3246 assert(!m1->wanted);
3247 assert(!m1->laundry);
3248 m1->free = FALSE;
e5568f75 3249
e5568f75 3250 } else {
2d21ac55
A
3251 vm_object_t object;
3252
3253 if (abort_run == TRUE)
3254 continue;
3255
3256 object = m1->object;
3257
3258 if (object != locked_object) {
3259 if (locked_object) {
3260 vm_object_unlock(locked_object);
3261 locked_object = VM_OBJECT_NULL;
3262 }
3263 if (vm_object_lock_try(object))
3264 locked_object = object;
3265 }
3266 if (locked_object == VM_OBJECT_NULL ||
3267 (m1->wire_count || m1->gobbled ||
3268 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3269 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3270 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3271
3272 if (locked_object) {
3273 vm_object_unlock(locked_object);
3274 locked_object = VM_OBJECT_NULL;
3275 }
3276 tmp_start_idx = cur_idx;
3277 abort_run = TRUE;
3278 continue;
3279 }
3280 if (m1->pmapped || m1->dirty) {
3281 int refmod;
3282 vm_object_offset_t offset;
3283
3284 m2 = vm_page_grab();
3285
3286 if (m2 == VM_PAGE_NULL) {
3287 if (locked_object) {
3288 vm_object_unlock(locked_object);
3289 locked_object = VM_OBJECT_NULL;
3290 }
3291 tmp_start_idx = cur_idx;
3292 abort_run = TRUE;
3293 continue;
3294 }
3295 if (m1->pmapped)
3296 refmod = pmap_disconnect(m1->phys_page);
3297 else
3298 refmod = 0;
3299 vm_page_copy(m1, m2);
3300
3301 m2->reference = m1->reference;
3302 m2->dirty = m1->dirty;
3303
3304 if (refmod & VM_MEM_REFERENCED)
3305 m2->reference = TRUE;
3306 if (refmod & VM_MEM_MODIFIED)
3307 m2->dirty = TRUE;
3308 offset = m1->offset;
3309
3310 /*
3311 * completely cleans up the state
3312 * of the page so that it is ready
3313 * to be put onto the free list, or
3314 * for this purpose it looks like it
3315 * just came off of the free list
3316 */
3317 vm_page_free_prepare(m1);
3318
3319 /*
3320 * make sure we clear the ref/mod state
3321 * from the pmap layer... else we risk
3322 * inheriting state from the last time
3323 * this page was used...
3324 */
3325 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3326 /*
3327 * now put the substitute page on the object
3328 */
3329 vm_page_insert_internal(m2, locked_object, offset, TRUE);
3330
3331 if (m2->reference)
3332 vm_page_activate(m2);
3333 else
3334 vm_page_deactivate(m2);
3335
3336 PAGE_WAKEUP_DONE(m2);
3337
3338 } else {
3339 /*
3340 * completely cleans up the state
3341 * of the page so that it is ready
3342 * to be put onto the free list, or
3343 * for this purpose it looks like it
3344 * just came off of the free list
3345 */
3346 vm_page_free_prepare(m1);
3347 }
3348#if MACH_ASSERT
3349 stolen_pages++;
3350#endif
1c79356b 3351 }
2d21ac55
A
3352 m1->pageq.next = (queue_entry_t) m;
3353 m1->pageq.prev = NULL;
3354 m = m1;
e5568f75 3355 }
2d21ac55
A
3356 if (locked_object) {
3357 vm_object_unlock(locked_object);
3358 locked_object = VM_OBJECT_NULL;
1c79356b
A
3359 }
3360
2d21ac55
A
3361 if (abort_run == TRUE) {
3362 if (m != VM_PAGE_NULL) {
3363 vm_page_free_list(m);
3364 }
3365#if MACH_ASSERT
3366 dumped_run++;
3367#endif
3368 /*
3369 * want the index of the last
3370 * page in this run that was
3371 * successfully 'stolen', so back
3372 * it up 1 for the auto-decrement on use
3373 * and 1 more to bump back over this page
3374 */
3375 page_idx = tmp_start_idx + 2;
e5568f75 3376
2d21ac55
A
3377 if (page_idx >= vm_pages_count)
3378 goto done_scanning;
3379
3380 mutex_lock(&vm_page_queue_free_lock);
3381
3382 RESET_STATE_OF_RUN();
3383
3384 /*
3385 * reset our free page limit since we
3386 * dropped the lock protecting the vm_page_free_queue
3387 */
3388 free_available = vm_page_free_count - vm_page_free_reserved;
3389
3390 goto retry;
3391 }
e5568f75 3392
e5568f75 3393 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2d21ac55
A
3394
3395 if (wire == TRUE)
3396 m1->wire_count++;
3397 else
3398 m1->gobbled = TRUE;
e5568f75 3399 }
2d21ac55
A
3400 if (wire == FALSE)
3401 vm_page_gobble_count += npages;
3402
3403 /*
3404 * gobbled pages are also counted as wired pages
3405 */
e5568f75 3406 vm_page_wire_count += npages;
e5568f75 3407
2d21ac55
A
3408 assert(vm_page_verify_contiguous(m, npages));
3409 }
3410done_scanning:
3411 vm_page_unlock_queues();
3412
593a1d5f 3413#if DEBUG
2d21ac55
A
3414 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
3415
3416 tv_end_sec -= tv_start_sec;
3417 if (tv_end_usec < tv_start_usec) {
3418 tv_end_sec--;
3419 tv_end_usec += 1000000;
1c79356b 3420 }
2d21ac55
A
3421 tv_end_usec -= tv_start_usec;
3422 if (tv_end_usec >= 1000000) {
3423 tv_end_sec++;
3424 tv_end_sec -= 1000000;
3425 }
3426 printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
3427 contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
e5568f75 3428
593a1d5f
A
3429#endif
3430#if MACH_ASSERT
2d21ac55
A
3431 vm_page_verify_free_lists();
3432#endif
e5568f75 3433 return m;
1c79356b
A
3434}
3435
3436/*
3437 * Allocate a list of contiguous, wired pages.
3438 */
3439kern_return_t
3440cpm_allocate(
3441 vm_size_t size,
3442 vm_page_t *list,
2d21ac55 3443 ppnum_t max_pnum,
1c79356b
A
3444 boolean_t wire)
3445{
91447636
A
3446 vm_page_t pages;
3447 unsigned int npages;
1c79356b
A
3448
3449 if (size % page_size != 0)
3450 return KERN_INVALID_ARGUMENT;
3451
e5568f75 3452 npages = size / page_size;
1c79356b 3453
1c79356b
A
3454 /*
3455 * Obtain a pointer to a subset of the free
3456 * list large enough to satisfy the request;
3457 * the region will be physically contiguous.
3458 */
2d21ac55 3459 pages = vm_page_find_contiguous(npages, max_pnum, wire);
e5568f75 3460
2d21ac55 3461 if (pages == VM_PAGE_NULL)
1c79356b 3462 return KERN_NO_SPACE;
1c79356b 3463 /*
2d21ac55 3464 * determine need for wakeups
1c79356b 3465 */
2d21ac55
A
3466 if ((vm_page_free_count < vm_page_free_min) ||
3467 ((vm_page_free_count < vm_page_free_target) &&
3468 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
e5568f75 3469 thread_wakeup((event_t) &vm_page_free_wanted);
2d21ac55
A
3470
3471#if CONFIG_EMBEDDED
3472 {
3473 int percent_avail;
e5568f75 3474
2d21ac55
A
3475 /*
3476 * Decide if we need to poke the memorystatus notification thread.
3477 */
3478 percent_avail =
3479 (vm_page_active_count + vm_page_inactive_count +
3480 vm_page_speculative_count + vm_page_free_count +
cf7d32b8 3481 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2d21ac55
A
3482 atop_64(max_mem);
3483 if (percent_avail <= (kern_memorystatus_level - 5)) {
3484 kern_memorystatus_level = percent_avail;
3485 thread_wakeup((event_t)&kern_memorystatus_wakeup);
3486 }
3487 }
3488#endif
1c79356b
A
3489 /*
3490 * The CPM pages should now be available and
3491 * ordered by ascending physical address.
3492 */
3493 assert(vm_page_verify_contiguous(pages, npages));
3494
3495 *list = pages;
3496 return KERN_SUCCESS;
3497}
2d21ac55 3498
1c79356b
A
3499
3500#include <mach_vm_debug.h>
3501#if MACH_VM_DEBUG
3502
3503#include <mach_debug/hash_info.h>
3504#include <vm/vm_debug.h>
3505
3506/*
3507 * Routine: vm_page_info
3508 * Purpose:
3509 * Return information about the global VP table.
3510 * Fills the buffer with as much information as possible
3511 * and returns the desired size of the buffer.
3512 * Conditions:
3513 * Nothing locked. The caller should provide
3514 * possibly-pageable memory.
3515 */
3516
3517unsigned int
3518vm_page_info(
3519 hash_info_bucket_t *info,
3520 unsigned int count)
3521{
91447636 3522 unsigned int i;
1c79356b
A
3523
3524 if (vm_page_bucket_count < count)
3525 count = vm_page_bucket_count;
3526
3527 for (i = 0; i < count; i++) {
3528 vm_page_bucket_t *bucket = &vm_page_buckets[i];
3529 unsigned int bucket_count = 0;
3530 vm_page_t m;
3531
3532 simple_lock(&vm_page_bucket_lock);
3533 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
3534 bucket_count++;
3535 simple_unlock(&vm_page_bucket_lock);
3536
3537 /* don't touch pageable memory while holding locks */
3538 info[i].hib_count = bucket_count;
3539 }
3540
3541 return vm_page_bucket_count;
3542}
3543#endif /* MACH_VM_DEBUG */
3544
3545#include <mach_kdb.h>
3546#if MACH_KDB
3547
3548#include <ddb/db_output.h>
3549#include <vm/vm_print.h>
3550#define printf kdbprintf
3551
3552/*
3553 * Routine: vm_page_print [exported]
3554 */
3555void
3556vm_page_print(
91447636 3557 db_addr_t db_addr)
1c79356b 3558{
91447636
A
3559 vm_page_t p;
3560
3561 p = (vm_page_t) (long) db_addr;
1c79356b
A
3562
3563 iprintf("page 0x%x\n", p);
3564
3565 db_indent += 2;
3566
3567 iprintf("object=0x%x", p->object);
3568 printf(", offset=0x%x", p->offset);
3569 printf(", wire_count=%d", p->wire_count);
1c79356b 3570
2d21ac55 3571 iprintf("%sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
1c79356b
A
3572 (p->inactive ? "" : "!"),
3573 (p->active ? "" : "!"),
2d21ac55 3574 (p->throttled ? "" : "!"),
1c79356b
A
3575 (p->gobbled ? "" : "!"),
3576 (p->laundry ? "" : "!"),
3577 (p->free ? "" : "!"),
3578 (p->reference ? "" : "!"),
91447636 3579 (p->encrypted ? "" : "!"));
1c79356b
A
3580 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
3581 (p->busy ? "" : "!"),
3582 (p->wanted ? "" : "!"),
3583 (p->tabled ? "" : "!"),
3584 (p->fictitious ? "" : "!"),
3585 (p->private ? "" : "!"),
3586 (p->precious ? "" : "!"));
3587 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
3588 (p->absent ? "" : "!"),
3589 (p->error ? "" : "!"),
3590 (p->dirty ? "" : "!"),
3591 (p->cleaning ? "" : "!"),
3592 (p->pageout ? "" : "!"),
3593 (p->clustered ? "" : "!"));
2d21ac55 3594 iprintf("%soverwriting, %srestart, %sunusual\n",
1c79356b
A
3595 (p->overwriting ? "" : "!"),
3596 (p->restart ? "" : "!"),
0b4e3aa0 3597 (p->unusual ? "" : "!"));
1c79356b 3598
55e303ae 3599 iprintf("phys_page=0x%x", p->phys_page);
1c79356b
A
3600
3601 db_indent -= 2;
3602}
3603#endif /* MACH_KDB */