]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23 /*
24 * @OSF_COPYRIGHT@
25 */
26 /*
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
29 * All Rights Reserved.
30 *
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
36 *
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 *
41 * Carnegie Mellon requests users of this software to return to
42 *
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
47 *
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
50 */
51 /*
52 */
53 /*
54 * File: vm/vm_page.c
55 * Author: Avadis Tevanian, Jr., Michael Wayne Young
56 *
57 * Resident memory management module.
58 */
59
60 #include <debug.h>
61
62 #include <mach/clock_types.h>
63 #include <mach/vm_prot.h>
64 #include <mach/vm_statistics.h>
65 #include <kern/counters.h>
66 #include <kern/sched_prim.h>
67 #include <kern/task.h>
68 #include <kern/thread.h>
69 #include <kern/zalloc.h>
70 #include <kern/xpr.h>
71 #include <vm/pmap.h>
72 #include <vm/vm_init.h>
73 #include <vm/vm_map.h>
74 #include <vm/vm_page.h>
75 #include <vm/vm_pageout.h>
76 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
77 #include <kern/misc_protos.h>
78 #include <zone_debug.h>
79 #include <vm/cpm.h>
80 #include <ppc/mappings.h> /* (BRINGUP) */
81 #include <pexpert/pexpert.h> /* (BRINGUP) */
82
83 #include <vm/vm_protos.h>
84
85 /* Variables used to indicate the relative age of pages in the
86 * inactive list
87 */
88
89 unsigned int vm_page_ticket_roll = 0;
90 unsigned int vm_page_ticket = 0;
91 /*
92 * Associated with page of user-allocatable memory is a
93 * page structure.
94 */
95
96 /*
97 * These variables record the values returned by vm_page_bootstrap,
98 * for debugging purposes. The implementation of pmap_steal_memory
99 * and pmap_startup here also uses them internally.
100 */
101
102 vm_offset_t virtual_space_start;
103 vm_offset_t virtual_space_end;
104 int vm_page_pages;
105
106 /*
107 * The vm_page_lookup() routine, which provides for fast
108 * (virtual memory object, offset) to page lookup, employs
109 * the following hash table. The vm_page_{insert,remove}
110 * routines install and remove associations in the table.
111 * [This table is often called the virtual-to-physical,
112 * or VP, table.]
113 */
114 typedef struct {
115 vm_page_t pages;
116 #if MACH_PAGE_HASH_STATS
117 int cur_count; /* current count */
118 int hi_count; /* high water mark */
119 #endif /* MACH_PAGE_HASH_STATS */
120 } vm_page_bucket_t;
121
122 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
123 unsigned int vm_page_bucket_count = 0; /* How big is array? */
124 unsigned int vm_page_hash_mask; /* Mask for hash function */
125 unsigned int vm_page_hash_shift; /* Shift for hash function */
126 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
127 decl_simple_lock_data(,vm_page_bucket_lock)
128
129 vm_page_t
130 vm_page_lookup_nohint(vm_object_t object, vm_object_offset_t offset);
131
132
133 #if MACH_PAGE_HASH_STATS
134 /* This routine is only for debug. It is intended to be called by
135 * hand by a developer using a kernel debugger. This routine prints
136 * out vm_page_hash table statistics to the kernel debug console.
137 */
138 void
139 hash_debug(void)
140 {
141 int i;
142 int numbuckets = 0;
143 int highsum = 0;
144 int maxdepth = 0;
145
146 for (i = 0; i < vm_page_bucket_count; i++) {
147 if (vm_page_buckets[i].hi_count) {
148 numbuckets++;
149 highsum += vm_page_buckets[i].hi_count;
150 if (vm_page_buckets[i].hi_count > maxdepth)
151 maxdepth = vm_page_buckets[i].hi_count;
152 }
153 }
154 printf("Total number of buckets: %d\n", vm_page_bucket_count);
155 printf("Number used buckets: %d = %d%%\n",
156 numbuckets, 100*numbuckets/vm_page_bucket_count);
157 printf("Number unused buckets: %d = %d%%\n",
158 vm_page_bucket_count - numbuckets,
159 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
160 printf("Sum of bucket max depth: %d\n", highsum);
161 printf("Average bucket depth: %d.%2d\n",
162 highsum/vm_page_bucket_count,
163 highsum%vm_page_bucket_count);
164 printf("Maximum bucket depth: %d\n", maxdepth);
165 }
166 #endif /* MACH_PAGE_HASH_STATS */
167
168 /*
169 * The virtual page size is currently implemented as a runtime
170 * variable, but is constant once initialized using vm_set_page_size.
171 * This initialization must be done in the machine-dependent
172 * bootstrap sequence, before calling other machine-independent
173 * initializations.
174 *
175 * All references to the virtual page size outside this
176 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
177 * constants.
178 */
179 vm_size_t page_size = PAGE_SIZE;
180 vm_size_t page_mask = PAGE_MASK;
181 int page_shift = PAGE_SHIFT;
182
183 /*
184 * Resident page structures are initialized from
185 * a template (see vm_page_alloc).
186 *
187 * When adding a new field to the virtual memory
188 * object structure, be sure to add initialization
189 * (see vm_page_bootstrap).
190 */
191 struct vm_page vm_page_template;
192
193 /*
194 * Resident pages that represent real memory
195 * are allocated from a free list.
196 */
197 vm_page_t vm_page_queue_free;
198 vm_page_t vm_page_queue_fictitious;
199 unsigned int vm_page_free_wanted;
200 unsigned int vm_page_free_count;
201 unsigned int vm_page_fictitious_count;
202
203 unsigned int vm_page_free_count_minimum; /* debugging */
204
205 /*
206 * Occasionally, the virtual memory system uses
207 * resident page structures that do not refer to
208 * real pages, for example to leave a page with
209 * important state information in the VP table.
210 *
211 * These page structures are allocated the way
212 * most other kernel structures are.
213 */
214 zone_t vm_page_zone;
215 decl_mutex_data(,vm_page_alloc_lock)
216 unsigned int io_throttle_zero_fill;
217
218 /*
219 * Fictitious pages don't have a physical address,
220 * but we must initialize phys_page to something.
221 * For debugging, this should be a strange value
222 * that the pmap module can recognize in assertions.
223 */
224 vm_offset_t vm_page_fictitious_addr = (vm_offset_t) -1;
225
226 /*
227 * Resident page structures are also chained on
228 * queues that are used by the page replacement
229 * system (pageout daemon). These queues are
230 * defined here, but are shared by the pageout
231 * module. The inactive queue is broken into
232 * inactive and zf for convenience as the
233 * pageout daemon often assignes a higher
234 * affinity to zf pages
235 */
236 queue_head_t vm_page_queue_active;
237 queue_head_t vm_page_queue_inactive;
238 unsigned int vm_page_active_count;
239 unsigned int vm_page_inactive_count;
240 unsigned int vm_page_wire_count;
241 unsigned int vm_page_gobble_count = 0;
242 unsigned int vm_page_wire_count_warning = 0;
243 unsigned int vm_page_gobble_count_warning = 0;
244
245 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
246 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
247
248 /*
249 * Several page replacement parameters are also
250 * shared with this module, so that page allocation
251 * (done here in vm_page_alloc) can trigger the
252 * pageout daemon.
253 */
254 unsigned int vm_page_free_target = 0;
255 unsigned int vm_page_free_min = 0;
256 unsigned int vm_page_inactive_target = 0;
257 unsigned int vm_page_free_reserved = 0;
258 unsigned int vm_page_throttled_count = 0;
259
260 /*
261 * The VM system has a couple of heuristics for deciding
262 * that pages are "uninteresting" and should be placed
263 * on the inactive queue as likely candidates for replacement.
264 * These variables let the heuristics be controlled at run-time
265 * to make experimentation easier.
266 */
267
268 boolean_t vm_page_deactivate_hint = TRUE;
269
270 /*
271 * vm_set_page_size:
272 *
273 * Sets the page size, perhaps based upon the memory
274 * size. Must be called before any use of page-size
275 * dependent functions.
276 *
277 * Sets page_shift and page_mask from page_size.
278 */
279 void
280 vm_set_page_size(void)
281 {
282 page_mask = page_size - 1;
283
284 if ((page_mask & page_size) != 0)
285 panic("vm_set_page_size: page size not a power of two");
286
287 for (page_shift = 0; ; page_shift++)
288 if ((1U << page_shift) == page_size)
289 break;
290 }
291
292 /*
293 * vm_page_bootstrap:
294 *
295 * Initializes the resident memory module.
296 *
297 * Allocates memory for the page cells, and
298 * for the object/offset-to-page hash table headers.
299 * Each page cell is initialized and placed on the free list.
300 * Returns the range of available kernel virtual memory.
301 */
302
303 void
304 vm_page_bootstrap(
305 vm_offset_t *startp,
306 vm_offset_t *endp)
307 {
308 register vm_page_t m;
309 unsigned int i;
310 unsigned int log1;
311 unsigned int log2;
312 unsigned int size;
313
314 /*
315 * Initialize the vm_page template.
316 */
317
318 m = &vm_page_template;
319 m->object = VM_OBJECT_NULL; /* reset later */
320 m->offset = (vm_object_offset_t) -1; /* reset later */
321 m->wire_count = 0;
322
323 m->pageq.next = NULL;
324 m->pageq.prev = NULL;
325 m->listq.next = NULL;
326 m->listq.prev = NULL;
327
328 m->inactive = FALSE;
329 m->active = FALSE;
330 m->laundry = FALSE;
331 m->free = FALSE;
332 m->no_isync = TRUE;
333 m->reference = FALSE;
334 m->pageout = FALSE;
335 m->dump_cleaning = FALSE;
336 m->list_req_pending = FALSE;
337
338 m->busy = TRUE;
339 m->wanted = FALSE;
340 m->tabled = FALSE;
341 m->fictitious = FALSE;
342 m->private = FALSE;
343 m->absent = FALSE;
344 m->error = FALSE;
345 m->dirty = FALSE;
346 m->cleaning = FALSE;
347 m->precious = FALSE;
348 m->clustered = FALSE;
349 m->lock_supplied = FALSE;
350 m->unusual = FALSE;
351 m->restart = FALSE;
352 m->zero_fill = FALSE;
353 m->encrypted = FALSE;
354
355 m->phys_page = 0; /* reset later */
356
357 m->page_lock = VM_PROT_NONE;
358 m->unlock_request = VM_PROT_NONE;
359 m->page_error = KERN_SUCCESS;
360
361 /*
362 * Initialize the page queues.
363 */
364
365 mutex_init(&vm_page_queue_free_lock, 0);
366 mutex_init(&vm_page_queue_lock, 0);
367
368 vm_page_queue_free = VM_PAGE_NULL;
369 vm_page_queue_fictitious = VM_PAGE_NULL;
370 queue_init(&vm_page_queue_active);
371 queue_init(&vm_page_queue_inactive);
372 queue_init(&vm_page_queue_zf);
373
374 vm_page_free_wanted = 0;
375
376 /*
377 * Steal memory for the map and zone subsystems.
378 */
379
380 vm_map_steal_memory();
381 zone_steal_memory();
382
383 /*
384 * Allocate (and initialize) the virtual-to-physical
385 * table hash buckets.
386 *
387 * The number of buckets should be a power of two to
388 * get a good hash function. The following computation
389 * chooses the first power of two that is greater
390 * than the number of physical pages in the system.
391 */
392
393 simple_lock_init(&vm_page_bucket_lock, 0);
394
395 if (vm_page_bucket_count == 0) {
396 unsigned int npages = pmap_free_pages();
397
398 vm_page_bucket_count = 1;
399 while (vm_page_bucket_count < npages)
400 vm_page_bucket_count <<= 1;
401 }
402
403 vm_page_hash_mask = vm_page_bucket_count - 1;
404
405 /*
406 * Calculate object shift value for hashing algorithm:
407 * O = log2(sizeof(struct vm_object))
408 * B = log2(vm_page_bucket_count)
409 * hash shifts the object left by
410 * B/2 - O
411 */
412 size = vm_page_bucket_count;
413 for (log1 = 0; size > 1; log1++)
414 size /= 2;
415 size = sizeof(struct vm_object);
416 for (log2 = 0; size > 1; log2++)
417 size /= 2;
418 vm_page_hash_shift = log1/2 - log2 + 1;
419
420 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
421 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
422 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
423
424 if (vm_page_hash_mask & vm_page_bucket_count)
425 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
426
427 vm_page_buckets = (vm_page_bucket_t *)
428 pmap_steal_memory(vm_page_bucket_count *
429 sizeof(vm_page_bucket_t));
430
431 for (i = 0; i < vm_page_bucket_count; i++) {
432 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
433
434 bucket->pages = VM_PAGE_NULL;
435 #if MACH_PAGE_HASH_STATS
436 bucket->cur_count = 0;
437 bucket->hi_count = 0;
438 #endif /* MACH_PAGE_HASH_STATS */
439 }
440
441 /*
442 * Machine-dependent code allocates the resident page table.
443 * It uses vm_page_init to initialize the page frames.
444 * The code also returns to us the virtual space available
445 * to the kernel. We don't trust the pmap module
446 * to get the alignment right.
447 */
448
449 pmap_startup(&virtual_space_start, &virtual_space_end);
450 virtual_space_start = round_page(virtual_space_start);
451 virtual_space_end = trunc_page(virtual_space_end);
452
453 *startp = virtual_space_start;
454 *endp = virtual_space_end;
455
456 /*
457 * Compute the initial "wire" count.
458 * Up until now, the pages which have been set aside are not under
459 * the VM system's control, so although they aren't explicitly
460 * wired, they nonetheless can't be moved. At this moment,
461 * all VM managed pages are "free", courtesy of pmap_startup.
462 */
463 vm_page_wire_count = atop_64(max_mem) - vm_page_free_count; /* initial value */
464
465 printf("vm_page_bootstrap: %d free pages\n", vm_page_free_count);
466 vm_page_free_count_minimum = vm_page_free_count;
467
468 simple_lock_init(&vm_paging_lock, 0);
469 }
470
471 #ifndef MACHINE_PAGES
472 /*
473 * We implement pmap_steal_memory and pmap_startup with the help
474 * of two simpler functions, pmap_virtual_space and pmap_next_page.
475 */
476
477 void *
478 pmap_steal_memory(
479 vm_size_t size)
480 {
481 vm_offset_t addr, vaddr;
482 ppnum_t phys_page;
483
484 /*
485 * We round the size to a round multiple.
486 */
487
488 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
489
490 /*
491 * If this is the first call to pmap_steal_memory,
492 * we have to initialize ourself.
493 */
494
495 if (virtual_space_start == virtual_space_end) {
496 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
497
498 /*
499 * The initial values must be aligned properly, and
500 * we don't trust the pmap module to do it right.
501 */
502
503 virtual_space_start = round_page(virtual_space_start);
504 virtual_space_end = trunc_page(virtual_space_end);
505 }
506
507 /*
508 * Allocate virtual memory for this request.
509 */
510
511 addr = virtual_space_start;
512 virtual_space_start += size;
513
514 kprintf("pmap_steal_memory: %08X - %08X; size=%08X\n", addr, virtual_space_start, size); /* (TEST/DEBUG) */
515
516 /*
517 * Allocate and map physical pages to back new virtual pages.
518 */
519
520 for (vaddr = round_page(addr);
521 vaddr < addr + size;
522 vaddr += PAGE_SIZE) {
523 if (!pmap_next_page(&phys_page))
524 panic("pmap_steal_memory");
525
526 /*
527 * XXX Logically, these mappings should be wired,
528 * but some pmap modules barf if they are.
529 */
530
531 pmap_enter(kernel_pmap, vaddr, phys_page,
532 VM_PROT_READ|VM_PROT_WRITE,
533 VM_WIMG_USE_DEFAULT, FALSE);
534 /*
535 * Account for newly stolen memory
536 */
537 vm_page_wire_count++;
538
539 }
540
541 return (void *) addr;
542 }
543
544 void
545 pmap_startup(
546 vm_offset_t *startp,
547 vm_offset_t *endp)
548 {
549 unsigned int i, npages, pages_initialized, fill, fillval;
550 vm_page_t pages;
551 ppnum_t phys_page;
552 addr64_t tmpaddr;
553
554 /*
555 * We calculate how many page frames we will have
556 * and then allocate the page structures in one chunk.
557 */
558
559 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
560 tmpaddr = tmpaddr + (addr64_t)(round_page_32(virtual_space_start) - virtual_space_start); /* Account for any slop */
561 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
562
563 pages = (vm_page_t) pmap_steal_memory(npages * sizeof *pages);
564
565 /*
566 * Initialize the page frames.
567 */
568
569 for (i = 0, pages_initialized = 0; i < npages; i++) {
570 if (!pmap_next_page(&phys_page))
571 break;
572
573 vm_page_init(&pages[i], phys_page);
574 vm_page_pages++;
575 pages_initialized++;
576 }
577
578 /*
579 * Release pages in reverse order so that physical pages
580 * initially get allocated in ascending addresses. This keeps
581 * the devices (which must address physical memory) happy if
582 * they require several consecutive pages.
583 */
584
585 /*
586 * Check if we want to initialize pages to a known value
587 */
588
589 fill = 0; /* Assume no fill */
590 if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
591
592 for (i = pages_initialized; i > 0; i--) {
593 if(fill) fillPage(pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
594 vm_page_release(&pages[i - 1]);
595 }
596
597 #if 0
598 {
599 vm_page_t xx, xxo, xxl;
600 int j, k, l;
601
602 j = 0; /* (BRINGUP) */
603 xxl = 0;
604
605 for(xx = vm_page_queue_free; xx; xxl = xx, xx = xx->pageq.next) { /* (BRINGUP) */
606 j++; /* (BRINGUP) */
607 if(j > vm_page_free_count) { /* (BRINGUP) */
608 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
609 }
610
611 l = vm_page_free_count - j; /* (BRINGUP) */
612 k = 0; /* (BRINGUP) */
613
614 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
615
616 for(xxo = xx->pageq.next; xxo; xxo = xxo->pageq.next) { /* (BRINGUP) */
617 k++;
618 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
619 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
620 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
621 }
622 }
623 }
624
625 if(j != vm_page_free_count) { /* (BRINGUP) */
626 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
627 }
628 }
629 #endif
630
631
632 /*
633 * We have to re-align virtual_space_start,
634 * because pmap_steal_memory has been using it.
635 */
636
637 virtual_space_start = round_page_32(virtual_space_start);
638
639 *startp = virtual_space_start;
640 *endp = virtual_space_end;
641 }
642 #endif /* MACHINE_PAGES */
643
644 /*
645 * Routine: vm_page_module_init
646 * Purpose:
647 * Second initialization pass, to be done after
648 * the basic VM system is ready.
649 */
650 void
651 vm_page_module_init(void)
652 {
653 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
654 0, PAGE_SIZE, "vm pages");
655
656 #if ZONE_DEBUG
657 zone_debug_disable(vm_page_zone);
658 #endif /* ZONE_DEBUG */
659
660 zone_change(vm_page_zone, Z_EXPAND, FALSE);
661 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
662 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
663
664 /*
665 * Adjust zone statistics to account for the real pages allocated
666 * in vm_page_create(). [Q: is this really what we want?]
667 */
668 vm_page_zone->count += vm_page_pages;
669 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
670
671 mutex_init(&vm_page_alloc_lock, 0);
672 }
673
674 /*
675 * Routine: vm_page_create
676 * Purpose:
677 * After the VM system is up, machine-dependent code
678 * may stumble across more physical memory. For example,
679 * memory that it was reserving for a frame buffer.
680 * vm_page_create turns this memory into available pages.
681 */
682
683 void
684 vm_page_create(
685 ppnum_t start,
686 ppnum_t end)
687 {
688 ppnum_t phys_page;
689 vm_page_t m;
690
691 for (phys_page = start;
692 phys_page < end;
693 phys_page++) {
694 while ((m = (vm_page_t) vm_page_grab_fictitious())
695 == VM_PAGE_NULL)
696 vm_page_more_fictitious();
697
698 vm_page_init(m, phys_page);
699 vm_page_pages++;
700 vm_page_release(m);
701 }
702 }
703
704 /*
705 * vm_page_hash:
706 *
707 * Distributes the object/offset key pair among hash buckets.
708 *
709 * NOTE: The bucket count must be a power of 2
710 */
711 #define vm_page_hash(object, offset) (\
712 ( (natural_t)((uint32_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
713 & vm_page_hash_mask)
714
715 /*
716 * vm_page_insert: [ internal use only ]
717 *
718 * Inserts the given mem entry into the object/object-page
719 * table and object list.
720 *
721 * The object must be locked.
722 */
723
724 void
725 vm_page_insert(
726 register vm_page_t mem,
727 register vm_object_t object,
728 register vm_object_offset_t offset)
729 {
730 register vm_page_bucket_t *bucket;
731
732 XPR(XPR_VM_PAGE,
733 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
734 (integer_t)object, (integer_t)offset, (integer_t)mem, 0,0);
735
736 VM_PAGE_CHECK(mem);
737 #if DEBUG
738 _mutex_assert(&object->Lock, MA_OWNED);
739
740 if (mem->tabled || mem->object != VM_OBJECT_NULL)
741 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
742 "already in (obj=%p,off=0x%llx)",
743 mem, object, offset, mem->object, mem->offset);
744 #endif
745 assert(!object->internal || offset < object->size);
746
747 /* only insert "pageout" pages into "pageout" objects,
748 * and normal pages into normal objects */
749 assert(object->pageout == mem->pageout);
750
751 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
752
753 /*
754 * Record the object/offset pair in this page
755 */
756
757 mem->object = object;
758 mem->offset = offset;
759
760 /*
761 * Insert it into the object_object/offset hash table
762 */
763
764 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
765 simple_lock(&vm_page_bucket_lock);
766 mem->next = bucket->pages;
767 bucket->pages = mem;
768 #if MACH_PAGE_HASH_STATS
769 if (++bucket->cur_count > bucket->hi_count)
770 bucket->hi_count = bucket->cur_count;
771 #endif /* MACH_PAGE_HASH_STATS */
772 simple_unlock(&vm_page_bucket_lock);
773
774 /*
775 * Now link into the object's list of backed pages.
776 */
777
778 VM_PAGE_INSERT(mem, object);
779 mem->tabled = TRUE;
780
781 /*
782 * Show that the object has one more resident page.
783 */
784
785 object->resident_page_count++;
786
787 if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
788 object->purgable == VM_OBJECT_PURGABLE_EMPTY) {
789 vm_page_lock_queues();
790 vm_page_purgeable_count++;
791 vm_page_unlock_queues();
792 }
793 }
794
795 /*
796 * vm_page_replace:
797 *
798 * Exactly like vm_page_insert, except that we first
799 * remove any existing page at the given offset in object.
800 *
801 * The object and page queues must be locked.
802 */
803
804 void
805 vm_page_replace(
806 register vm_page_t mem,
807 register vm_object_t object,
808 register vm_object_offset_t offset)
809 {
810 register vm_page_bucket_t *bucket;
811
812 VM_PAGE_CHECK(mem);
813 #if DEBUG
814 _mutex_assert(&object->Lock, MA_OWNED);
815 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
816
817 if (mem->tabled || mem->object != VM_OBJECT_NULL)
818 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
819 "already in (obj=%p,off=0x%llx)",
820 mem, object, offset, mem->object, mem->offset);
821 #endif
822 /*
823 * Record the object/offset pair in this page
824 */
825
826 mem->object = object;
827 mem->offset = offset;
828
829 /*
830 * Insert it into the object_object/offset hash table,
831 * replacing any page that might have been there.
832 */
833
834 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
835 simple_lock(&vm_page_bucket_lock);
836 if (bucket->pages) {
837 vm_page_t *mp = &bucket->pages;
838 register vm_page_t m = *mp;
839 do {
840 if (m->object == object && m->offset == offset) {
841 /*
842 * Remove page from bucket and from object,
843 * and return it to the free list.
844 */
845 *mp = m->next;
846 VM_PAGE_REMOVE(m);
847 m->tabled = FALSE;
848 m->object = VM_OBJECT_NULL;
849 m->offset = (vm_object_offset_t) -1;
850 object->resident_page_count--;
851
852 if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
853 object->purgable == VM_OBJECT_PURGABLE_EMPTY) {
854 assert(vm_page_purgeable_count > 0);
855 vm_page_purgeable_count--;
856 }
857
858 /*
859 * Return page to the free list.
860 * Note the page is not tabled now, so this
861 * won't self-deadlock on the bucket lock.
862 */
863
864 vm_page_free(m);
865 break;
866 }
867 mp = &m->next;
868 } while ((m = *mp));
869 mem->next = bucket->pages;
870 } else {
871 mem->next = VM_PAGE_NULL;
872 }
873 bucket->pages = mem;
874 simple_unlock(&vm_page_bucket_lock);
875
876 /*
877 * Now link into the object's list of backed pages.
878 */
879
880 VM_PAGE_INSERT(mem, object);
881 mem->tabled = TRUE;
882
883 /*
884 * And show that the object has one more resident
885 * page.
886 */
887
888 object->resident_page_count++;
889
890 if (object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
891 object->purgable == VM_OBJECT_PURGABLE_EMPTY) {
892 vm_page_purgeable_count++;
893 }
894 }
895
896 /*
897 * vm_page_remove: [ internal use only ]
898 *
899 * Removes the given mem entry from the object/offset-page
900 * table and the object page list.
901 *
902 * The object and page queues must be locked.
903 */
904
905 void
906 vm_page_remove(
907 register vm_page_t mem)
908 {
909 register vm_page_bucket_t *bucket;
910 register vm_page_t this;
911
912 XPR(XPR_VM_PAGE,
913 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
914 (integer_t)mem->object, (integer_t)mem->offset,
915 (integer_t)mem, 0,0);
916 #if DEBUG
917 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
918 _mutex_assert(&mem->object->Lock, MA_OWNED);
919 #endif
920 assert(mem->tabled);
921 assert(!mem->cleaning);
922 VM_PAGE_CHECK(mem);
923
924
925 /*
926 * Remove from the object_object/offset hash table
927 */
928
929 bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
930 simple_lock(&vm_page_bucket_lock);
931 if ((this = bucket->pages) == mem) {
932 /* optimize for common case */
933
934 bucket->pages = mem->next;
935 } else {
936 register vm_page_t *prev;
937
938 for (prev = &this->next;
939 (this = *prev) != mem;
940 prev = &this->next)
941 continue;
942 *prev = this->next;
943 }
944 #if MACH_PAGE_HASH_STATS
945 bucket->cur_count--;
946 #endif /* MACH_PAGE_HASH_STATS */
947 simple_unlock(&vm_page_bucket_lock);
948
949 /*
950 * Now remove from the object's list of backed pages.
951 */
952
953 VM_PAGE_REMOVE(mem);
954
955 /*
956 * And show that the object has one fewer resident
957 * page.
958 */
959
960 mem->object->resident_page_count--;
961
962 if (mem->object->purgable == VM_OBJECT_PURGABLE_VOLATILE ||
963 mem->object->purgable == VM_OBJECT_PURGABLE_EMPTY) {
964 assert(vm_page_purgeable_count > 0);
965 vm_page_purgeable_count--;
966 }
967
968 mem->tabled = FALSE;
969 mem->object = VM_OBJECT_NULL;
970 mem->offset = (vm_object_offset_t) -1;
971 }
972
973 /*
974 * vm_page_lookup:
975 *
976 * Returns the page associated with the object/offset
977 * pair specified; if none is found, VM_PAGE_NULL is returned.
978 *
979 * The object must be locked. No side effects.
980 */
981
982 unsigned long vm_page_lookup_hint = 0;
983 unsigned long vm_page_lookup_hint_next = 0;
984 unsigned long vm_page_lookup_hint_prev = 0;
985 unsigned long vm_page_lookup_hint_miss = 0;
986
987 vm_page_t
988 vm_page_lookup(
989 register vm_object_t object,
990 register vm_object_offset_t offset)
991 {
992 register vm_page_t mem;
993 register vm_page_bucket_t *bucket;
994 queue_entry_t qe;
995 #if 0
996 _mutex_assert(&object->Lock, MA_OWNED);
997 #endif
998
999 mem = object->memq_hint;
1000 if (mem != VM_PAGE_NULL) {
1001 assert(mem->object == object);
1002 if (mem->offset == offset) {
1003 vm_page_lookup_hint++;
1004 return mem;
1005 }
1006 qe = queue_next(&mem->listq);
1007 if (! queue_end(&object->memq, qe)) {
1008 vm_page_t next_page;
1009
1010 next_page = (vm_page_t) qe;
1011 assert(next_page->object == object);
1012 if (next_page->offset == offset) {
1013 vm_page_lookup_hint_next++;
1014 object->memq_hint = next_page; /* new hint */
1015 return next_page;
1016 }
1017 }
1018 qe = queue_prev(&mem->listq);
1019 if (! queue_end(&object->memq, qe)) {
1020 vm_page_t prev_page;
1021
1022 prev_page = (vm_page_t) qe;
1023 assert(prev_page->object == object);
1024 if (prev_page->offset == offset) {
1025 vm_page_lookup_hint_prev++;
1026 object->memq_hint = prev_page; /* new hint */
1027 return prev_page;
1028 }
1029 }
1030 }
1031
1032 /*
1033 * Search the hash table for this object/offset pair
1034 */
1035
1036 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1037
1038 simple_lock(&vm_page_bucket_lock);
1039 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1040 VM_PAGE_CHECK(mem);
1041 if ((mem->object == object) && (mem->offset == offset))
1042 break;
1043 }
1044 simple_unlock(&vm_page_bucket_lock);
1045
1046 if (mem != VM_PAGE_NULL) {
1047 if (object->memq_hint != VM_PAGE_NULL) {
1048 vm_page_lookup_hint_miss++;
1049 }
1050 assert(mem->object == object);
1051 object->memq_hint = mem;
1052 }
1053
1054 return(mem);
1055 }
1056
1057
1058 vm_page_t
1059 vm_page_lookup_nohint(
1060 vm_object_t object,
1061 vm_object_offset_t offset)
1062 {
1063 register vm_page_t mem;
1064 register vm_page_bucket_t *bucket;
1065
1066 #if 0
1067 _mutex_assert(&object->Lock, MA_OWNED);
1068 #endif
1069 /*
1070 * Search the hash table for this object/offset pair
1071 */
1072
1073 bucket = &vm_page_buckets[vm_page_hash(object, offset)];
1074
1075 simple_lock(&vm_page_bucket_lock);
1076 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1077 VM_PAGE_CHECK(mem);
1078 if ((mem->object == object) && (mem->offset == offset))
1079 break;
1080 }
1081 simple_unlock(&vm_page_bucket_lock);
1082
1083 return(mem);
1084 }
1085
1086 /*
1087 * vm_page_rename:
1088 *
1089 * Move the given memory entry from its
1090 * current object to the specified target object/offset.
1091 *
1092 * The object must be locked.
1093 */
1094 void
1095 vm_page_rename(
1096 register vm_page_t mem,
1097 register vm_object_t new_object,
1098 vm_object_offset_t new_offset)
1099 {
1100 assert(mem->object != new_object);
1101 /*
1102 * ENCRYPTED SWAP:
1103 * The encryption key is based on the page's memory object
1104 * (aka "pager") and paging offset. Moving the page to
1105 * another VM object changes its "pager" and "paging_offset"
1106 * so it has to be decrypted first.
1107 */
1108 if (mem->encrypted) {
1109 panic("vm_page_rename: page %p is encrypted\n", mem);
1110 }
1111 /*
1112 * Changes to mem->object require the page lock because
1113 * the pageout daemon uses that lock to get the object.
1114 */
1115
1116 XPR(XPR_VM_PAGE,
1117 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1118 (integer_t)new_object, (integer_t)new_offset,
1119 (integer_t)mem, 0,0);
1120
1121 vm_page_lock_queues();
1122 vm_page_remove(mem);
1123 vm_page_insert(mem, new_object, new_offset);
1124 vm_page_unlock_queues();
1125 }
1126
1127 /*
1128 * vm_page_init:
1129 *
1130 * Initialize the fields in a new page.
1131 * This takes a structure with random values and initializes it
1132 * so that it can be given to vm_page_release or vm_page_insert.
1133 */
1134 void
1135 vm_page_init(
1136 vm_page_t mem,
1137 ppnum_t phys_page)
1138 {
1139 assert(phys_page);
1140 *mem = vm_page_template;
1141 mem->phys_page = phys_page;
1142 }
1143
1144 /*
1145 * vm_page_grab_fictitious:
1146 *
1147 * Remove a fictitious page from the free list.
1148 * Returns VM_PAGE_NULL if there are no free pages.
1149 */
1150 int c_vm_page_grab_fictitious = 0;
1151 int c_vm_page_release_fictitious = 0;
1152 int c_vm_page_more_fictitious = 0;
1153
1154 vm_page_t
1155 vm_page_grab_fictitious(void)
1156 {
1157 register vm_page_t m;
1158
1159 m = (vm_page_t)zget(vm_page_zone);
1160 if (m) {
1161 vm_page_init(m, vm_page_fictitious_addr);
1162 m->fictitious = TRUE;
1163 }
1164
1165 c_vm_page_grab_fictitious++;
1166 return m;
1167 }
1168
1169 /*
1170 * vm_page_release_fictitious:
1171 *
1172 * Release a fictitious page to the free list.
1173 */
1174
1175 void
1176 vm_page_release_fictitious(
1177 register vm_page_t m)
1178 {
1179 assert(!m->free);
1180 assert(m->busy);
1181 assert(m->fictitious);
1182 assert(m->phys_page == vm_page_fictitious_addr);
1183
1184 c_vm_page_release_fictitious++;
1185 #if DEBUG
1186 if (m->free)
1187 panic("vm_page_release_fictitious");
1188 #endif
1189 m->free = TRUE;
1190 zfree(vm_page_zone, m);
1191 }
1192
1193 /*
1194 * vm_page_more_fictitious:
1195 *
1196 * Add more fictitious pages to the free list.
1197 * Allowed to block. This routine is way intimate
1198 * with the zones code, for several reasons:
1199 * 1. we need to carve some page structures out of physical
1200 * memory before zones work, so they _cannot_ come from
1201 * the zone_map.
1202 * 2. the zone needs to be collectable in order to prevent
1203 * growth without bound. These structures are used by
1204 * the device pager (by the hundreds and thousands), as
1205 * private pages for pageout, and as blocking pages for
1206 * pagein. Temporary bursts in demand should not result in
1207 * permanent allocation of a resource.
1208 * 3. To smooth allocation humps, we allocate single pages
1209 * with kernel_memory_allocate(), and cram them into the
1210 * zone. This also allows us to initialize the vm_page_t's
1211 * on the way into the zone, so that zget() always returns
1212 * an initialized structure. The zone free element pointer
1213 * and the free page pointer are both the first item in the
1214 * vm_page_t.
1215 * 4. By having the pages in the zone pre-initialized, we need
1216 * not keep 2 levels of lists. The garbage collector simply
1217 * scans our list, and reduces physical memory usage as it
1218 * sees fit.
1219 */
1220
1221 void vm_page_more_fictitious(void)
1222 {
1223 register vm_page_t m;
1224 vm_offset_t addr;
1225 kern_return_t retval;
1226 int i;
1227
1228 c_vm_page_more_fictitious++;
1229
1230 /*
1231 * Allocate a single page from the zone_map. Do not wait if no physical
1232 * pages are immediately available, and do not zero the space. We need
1233 * our own blocking lock here to prevent having multiple,
1234 * simultaneous requests from piling up on the zone_map lock. Exactly
1235 * one (of our) threads should be potentially waiting on the map lock.
1236 * If winner is not vm-privileged, then the page allocation will fail,
1237 * and it will temporarily block here in the vm_page_wait().
1238 */
1239 mutex_lock(&vm_page_alloc_lock);
1240 /*
1241 * If another thread allocated space, just bail out now.
1242 */
1243 if (zone_free_count(vm_page_zone) > 5) {
1244 /*
1245 * The number "5" is a small number that is larger than the
1246 * number of fictitious pages that any single caller will
1247 * attempt to allocate. Otherwise, a thread will attempt to
1248 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1249 * release all of the resources and locks already acquired,
1250 * and then call this routine. This routine finds the pages
1251 * that the caller released, so fails to allocate new space.
1252 * The process repeats infinitely. The largest known number
1253 * of fictitious pages required in this manner is 2. 5 is
1254 * simply a somewhat larger number.
1255 */
1256 mutex_unlock(&vm_page_alloc_lock);
1257 return;
1258 }
1259
1260 retval = kernel_memory_allocate(zone_map,
1261 &addr, PAGE_SIZE, VM_PROT_ALL,
1262 KMA_KOBJECT|KMA_NOPAGEWAIT);
1263 if (retval != KERN_SUCCESS) {
1264 /*
1265 * No page was available. Tell the pageout daemon, drop the
1266 * lock to give another thread a chance at it, and
1267 * wait for the pageout daemon to make progress.
1268 */
1269 mutex_unlock(&vm_page_alloc_lock);
1270 vm_page_wait(THREAD_UNINT);
1271 return;
1272 }
1273 /*
1274 * Initialize as many vm_page_t's as will fit on this page. This
1275 * depends on the zone code disturbing ONLY the first item of
1276 * each zone element.
1277 */
1278 m = (vm_page_t)addr;
1279 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1280 vm_page_init(m, vm_page_fictitious_addr);
1281 m->fictitious = TRUE;
1282 m++;
1283 }
1284 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1285 mutex_unlock(&vm_page_alloc_lock);
1286 }
1287
1288 /*
1289 * vm_page_convert:
1290 *
1291 * Attempt to convert a fictitious page into a real page.
1292 */
1293
1294 boolean_t
1295 vm_page_convert(
1296 register vm_page_t m)
1297 {
1298 register vm_page_t real_m;
1299
1300 assert(m->busy);
1301 assert(m->fictitious);
1302 assert(!m->dirty);
1303
1304 real_m = vm_page_grab();
1305 if (real_m == VM_PAGE_NULL)
1306 return FALSE;
1307
1308 m->phys_page = real_m->phys_page;
1309 m->fictitious = FALSE;
1310 m->no_isync = TRUE;
1311
1312 vm_page_lock_queues();
1313 if (m->active)
1314 vm_page_active_count++;
1315 else if (m->inactive)
1316 vm_page_inactive_count++;
1317 vm_page_unlock_queues();
1318
1319 real_m->phys_page = vm_page_fictitious_addr;
1320 real_m->fictitious = TRUE;
1321
1322 vm_page_release_fictitious(real_m);
1323 return TRUE;
1324 }
1325
1326 /*
1327 * vm_pool_low():
1328 *
1329 * Return true if it is not likely that a non-vm_privileged thread
1330 * can get memory without blocking. Advisory only, since the
1331 * situation may change under us.
1332 */
1333 int
1334 vm_pool_low(void)
1335 {
1336 /* No locking, at worst we will fib. */
1337 return( vm_page_free_count < vm_page_free_reserved );
1338 }
1339
1340 /*
1341 * vm_page_grab:
1342 *
1343 * Remove a page from the free list.
1344 * Returns VM_PAGE_NULL if the free list is too small.
1345 */
1346
1347 unsigned long vm_page_grab_count = 0; /* measure demand */
1348
1349 vm_page_t
1350 vm_page_grab(void)
1351 {
1352 register vm_page_t mem;
1353
1354 mutex_lock(&vm_page_queue_free_lock);
1355 vm_page_grab_count++;
1356
1357 /*
1358 * Optionally produce warnings if the wire or gobble
1359 * counts exceed some threshold.
1360 */
1361 if (vm_page_wire_count_warning > 0
1362 && vm_page_wire_count >= vm_page_wire_count_warning) {
1363 printf("mk: vm_page_grab(): high wired page count of %d\n",
1364 vm_page_wire_count);
1365 assert(vm_page_wire_count < vm_page_wire_count_warning);
1366 }
1367 if (vm_page_gobble_count_warning > 0
1368 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1369 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1370 vm_page_gobble_count);
1371 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1372 }
1373
1374 /*
1375 * Only let privileged threads (involved in pageout)
1376 * dip into the reserved pool.
1377 */
1378
1379 if ((vm_page_free_count < vm_page_free_reserved) &&
1380 !(current_thread()->options & TH_OPT_VMPRIV)) {
1381 mutex_unlock(&vm_page_queue_free_lock);
1382 mem = VM_PAGE_NULL;
1383 goto wakeup_pageout;
1384 }
1385
1386 while (vm_page_queue_free == VM_PAGE_NULL) {
1387 mutex_unlock(&vm_page_queue_free_lock);
1388 VM_PAGE_WAIT();
1389 mutex_lock(&vm_page_queue_free_lock);
1390 }
1391
1392 if (--vm_page_free_count < vm_page_free_count_minimum)
1393 vm_page_free_count_minimum = vm_page_free_count;
1394 mem = vm_page_queue_free;
1395 vm_page_queue_free = (vm_page_t) mem->pageq.next;
1396 mem->pageq.next = NULL;
1397 mem->pageq.prev = NULL;
1398 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1399 assert(mem->tabled == FALSE);
1400 assert(mem->object == VM_OBJECT_NULL);
1401 assert(!mem->laundry);
1402 mem->free = FALSE;
1403 mem->no_isync = TRUE;
1404 mutex_unlock(&vm_page_queue_free_lock);
1405
1406 assert(pmap_verify_free(mem->phys_page));
1407
1408 /*
1409 * Decide if we should poke the pageout daemon.
1410 * We do this if the free count is less than the low
1411 * water mark, or if the free count is less than the high
1412 * water mark (but above the low water mark) and the inactive
1413 * count is less than its target.
1414 *
1415 * We don't have the counts locked ... if they change a little,
1416 * it doesn't really matter.
1417 */
1418
1419 wakeup_pageout:
1420 if ((vm_page_free_count < vm_page_free_min) ||
1421 ((vm_page_free_count < vm_page_free_target) &&
1422 (vm_page_inactive_count < vm_page_inactive_target)))
1423 thread_wakeup((event_t) &vm_page_free_wanted);
1424
1425 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1426
1427 return mem;
1428 }
1429
1430 /*
1431 * vm_page_release:
1432 *
1433 * Return a page to the free list.
1434 */
1435
1436 void
1437 vm_page_release(
1438 register vm_page_t mem)
1439 {
1440
1441 #if 0
1442 unsigned int pindex;
1443 phys_entry *physent;
1444
1445 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1446 if(physent->ppLink & ppN) { /* (BRINGUP) */
1447 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1448 }
1449 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1450 #endif
1451 assert(!mem->private && !mem->fictitious);
1452
1453 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1454
1455 mutex_lock(&vm_page_queue_free_lock);
1456 #if DEBUG
1457 if (mem->free)
1458 panic("vm_page_release");
1459 #endif
1460 mem->free = TRUE;
1461 assert(!mem->laundry);
1462 assert(mem->object == VM_OBJECT_NULL);
1463 assert(mem->pageq.next == NULL &&
1464 mem->pageq.prev == NULL);
1465 mem->pageq.next = (queue_entry_t) vm_page_queue_free;
1466 vm_page_queue_free = mem;
1467 vm_page_free_count++;
1468
1469 /*
1470 * Check if we should wake up someone waiting for page.
1471 * But don't bother waking them unless they can allocate.
1472 *
1473 * We wakeup only one thread, to prevent starvation.
1474 * Because the scheduling system handles wait queues FIFO,
1475 * if we wakeup all waiting threads, one greedy thread
1476 * can starve multiple niceguy threads. When the threads
1477 * all wakeup, the greedy threads runs first, grabs the page,
1478 * and waits for another page. It will be the first to run
1479 * when the next page is freed.
1480 *
1481 * However, there is a slight danger here.
1482 * The thread we wake might not use the free page.
1483 * Then the other threads could wait indefinitely
1484 * while the page goes unused. To forestall this,
1485 * the pageout daemon will keep making free pages
1486 * as long as vm_page_free_wanted is non-zero.
1487 */
1488
1489 if ((vm_page_free_wanted > 0) &&
1490 (vm_page_free_count >= vm_page_free_reserved)) {
1491 vm_page_free_wanted--;
1492 thread_wakeup_one((event_t) &vm_page_free_count);
1493 }
1494
1495 mutex_unlock(&vm_page_queue_free_lock);
1496 }
1497
1498 /*
1499 * vm_page_wait:
1500 *
1501 * Wait for a page to become available.
1502 * If there are plenty of free pages, then we don't sleep.
1503 *
1504 * Returns:
1505 * TRUE: There may be another page, try again
1506 * FALSE: We were interrupted out of our wait, don't try again
1507 */
1508
1509 boolean_t
1510 vm_page_wait(
1511 int interruptible )
1512 {
1513 /*
1514 * We can't use vm_page_free_reserved to make this
1515 * determination. Consider: some thread might
1516 * need to allocate two pages. The first allocation
1517 * succeeds, the second fails. After the first page is freed,
1518 * a call to vm_page_wait must really block.
1519 */
1520 kern_return_t wait_result;
1521 int need_wakeup = 0;
1522
1523 mutex_lock(&vm_page_queue_free_lock);
1524 if (vm_page_free_count < vm_page_free_target) {
1525 if (vm_page_free_wanted++ == 0)
1526 need_wakeup = 1;
1527 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
1528 mutex_unlock(&vm_page_queue_free_lock);
1529 counter(c_vm_page_wait_block++);
1530
1531 if (need_wakeup)
1532 thread_wakeup((event_t)&vm_page_free_wanted);
1533
1534 if (wait_result == THREAD_WAITING)
1535 wait_result = thread_block(THREAD_CONTINUE_NULL);
1536
1537 return(wait_result == THREAD_AWAKENED);
1538 } else {
1539 mutex_unlock(&vm_page_queue_free_lock);
1540 return TRUE;
1541 }
1542 }
1543
1544 /*
1545 * vm_page_alloc:
1546 *
1547 * Allocate and return a memory cell associated
1548 * with this VM object/offset pair.
1549 *
1550 * Object must be locked.
1551 */
1552
1553 vm_page_t
1554 vm_page_alloc(
1555 vm_object_t object,
1556 vm_object_offset_t offset)
1557 {
1558 register vm_page_t mem;
1559
1560 #if DEBUG
1561 _mutex_assert(&object->Lock, MA_OWNED);
1562 #endif
1563 mem = vm_page_grab();
1564 if (mem == VM_PAGE_NULL)
1565 return VM_PAGE_NULL;
1566
1567 vm_page_insert(mem, object, offset);
1568
1569 return(mem);
1570 }
1571
1572 counter(unsigned int c_laundry_pages_freed = 0;)
1573
1574 int vm_pagein_cluster_unused = 0;
1575 boolean_t vm_page_free_verify = TRUE;
1576 /*
1577 * vm_page_free:
1578 *
1579 * Returns the given page to the free list,
1580 * disassociating it with any VM object.
1581 *
1582 * Object and page queues must be locked prior to entry.
1583 */
1584 void
1585 vm_page_free(
1586 register vm_page_t mem)
1587 {
1588 vm_object_t object = mem->object;
1589
1590 assert(!mem->free);
1591 assert(!mem->cleaning);
1592 assert(!mem->pageout);
1593 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
1594 assert(pmap_verify_free(mem->phys_page));
1595 }
1596
1597 #if DEBUG
1598 if (mem->object)
1599 _mutex_assert(&mem->object->Lock, MA_OWNED);
1600 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1601
1602 if (mem->free)
1603 panic("vm_page_free: freeing page on free list\n");
1604 #endif
1605 if (mem->tabled)
1606 vm_page_remove(mem); /* clears tabled, object, offset */
1607 VM_PAGE_QUEUES_REMOVE(mem); /* clears active or inactive */
1608
1609 if (mem->clustered) {
1610 mem->clustered = FALSE;
1611 vm_pagein_cluster_unused++;
1612 }
1613
1614 if (mem->wire_count) {
1615 if (!mem->private && !mem->fictitious)
1616 vm_page_wire_count--;
1617 mem->wire_count = 0;
1618 assert(!mem->gobbled);
1619 } else if (mem->gobbled) {
1620 if (!mem->private && !mem->fictitious)
1621 vm_page_wire_count--;
1622 vm_page_gobble_count--;
1623 }
1624 mem->gobbled = FALSE;
1625
1626 if (mem->laundry) {
1627 vm_pageout_throttle_up(mem);
1628 counter(++c_laundry_pages_freed);
1629 }
1630
1631 PAGE_WAKEUP(mem); /* clears wanted */
1632
1633 if (mem->absent)
1634 vm_object_absent_release(object);
1635
1636 /* Some of these may be unnecessary */
1637 mem->page_lock = 0;
1638 mem->unlock_request = 0;
1639 mem->busy = TRUE;
1640 mem->absent = FALSE;
1641 mem->error = FALSE;
1642 mem->dirty = FALSE;
1643 mem->precious = FALSE;
1644 mem->reference = FALSE;
1645 mem->encrypted = FALSE;
1646
1647 mem->page_error = KERN_SUCCESS;
1648
1649 if (mem->private) {
1650 mem->private = FALSE;
1651 mem->fictitious = TRUE;
1652 mem->phys_page = vm_page_fictitious_addr;
1653 }
1654 if (mem->fictitious) {
1655 vm_page_release_fictitious(mem);
1656 } else {
1657 /* depends on the queues lock */
1658 if(mem->zero_fill) {
1659 vm_zf_count-=1;
1660 mem->zero_fill = FALSE;
1661 }
1662 vm_page_init(mem, mem->phys_page);
1663 vm_page_release(mem);
1664 }
1665 }
1666
1667
1668 void
1669 vm_page_free_list(
1670 register vm_page_t mem)
1671 {
1672 register vm_page_t nxt;
1673 register vm_page_t first = NULL;
1674 register vm_page_t last = VM_PAGE_NULL;
1675 register int pg_count = 0;
1676
1677 #if DEBUG
1678 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1679 #endif
1680 while (mem) {
1681 #if DEBUG
1682 if (mem->tabled || mem->object)
1683 panic("vm_page_free_list: freeing tabled page\n");
1684 if (mem->inactive || mem->active || mem->free)
1685 panic("vm_page_free_list: freeing page on list\n");
1686 #endif
1687 assert(mem->pageq.prev == NULL);
1688 nxt = (vm_page_t)(mem->pageq.next);
1689
1690 if (mem->clustered)
1691 vm_pagein_cluster_unused++;
1692
1693 if (mem->laundry) {
1694 vm_pageout_throttle_up(mem);
1695 counter(++c_laundry_pages_freed);
1696 }
1697 mem->busy = TRUE;
1698
1699 PAGE_WAKEUP(mem); /* clears wanted */
1700
1701 if (mem->private)
1702 mem->fictitious = TRUE;
1703
1704 if (!mem->fictitious) {
1705 /* depends on the queues lock */
1706 if (mem->zero_fill)
1707 vm_zf_count -= 1;
1708 assert(!mem->laundry);
1709 vm_page_init(mem, mem->phys_page);
1710
1711 mem->free = TRUE;
1712
1713 if (first == NULL)
1714 last = mem;
1715 mem->pageq.next = (queue_t) first;
1716 first = mem;
1717
1718 pg_count++;
1719 } else {
1720 mem->phys_page = vm_page_fictitious_addr;
1721 vm_page_release_fictitious(mem);
1722 }
1723 mem = nxt;
1724 }
1725 if (first) {
1726
1727 mutex_lock(&vm_page_queue_free_lock);
1728
1729 last->pageq.next = (queue_entry_t) vm_page_queue_free;
1730 vm_page_queue_free = first;
1731
1732 vm_page_free_count += pg_count;
1733
1734 if ((vm_page_free_wanted > 0) &&
1735 (vm_page_free_count >= vm_page_free_reserved)) {
1736 unsigned int available_pages;
1737
1738 if (vm_page_free_count >= vm_page_free_reserved) {
1739 available_pages = (vm_page_free_count
1740 - vm_page_free_reserved);
1741 } else {
1742 available_pages = 0;
1743 }
1744
1745 if (available_pages >= vm_page_free_wanted) {
1746 vm_page_free_wanted = 0;
1747 thread_wakeup((event_t) &vm_page_free_count);
1748 } else {
1749 while (available_pages--) {
1750 vm_page_free_wanted--;
1751 thread_wakeup_one((event_t) &vm_page_free_count);
1752 }
1753 }
1754 }
1755 mutex_unlock(&vm_page_queue_free_lock);
1756 }
1757 }
1758
1759
1760 /*
1761 * vm_page_wire:
1762 *
1763 * Mark this page as wired down by yet
1764 * another map, removing it from paging queues
1765 * as necessary.
1766 *
1767 * The page's object and the page queues must be locked.
1768 */
1769 void
1770 vm_page_wire(
1771 register vm_page_t mem)
1772 {
1773
1774 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
1775
1776 VM_PAGE_CHECK(mem);
1777 #if DEBUG
1778 if (mem->object)
1779 _mutex_assert(&mem->object->Lock, MA_OWNED);
1780 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1781 #endif
1782 if (mem->wire_count == 0) {
1783 VM_PAGE_QUEUES_REMOVE(mem);
1784 if (!mem->private && !mem->fictitious && !mem->gobbled)
1785 vm_page_wire_count++;
1786 if (mem->gobbled)
1787 vm_page_gobble_count--;
1788 mem->gobbled = FALSE;
1789 if(mem->zero_fill) {
1790 /* depends on the queues lock */
1791 vm_zf_count-=1;
1792 mem->zero_fill = FALSE;
1793 }
1794 /*
1795 * ENCRYPTED SWAP:
1796 * The page could be encrypted, but
1797 * We don't have to decrypt it here
1798 * because we don't guarantee that the
1799 * data is actually valid at this point.
1800 * The page will get decrypted in
1801 * vm_fault_wire() if needed.
1802 */
1803 }
1804 assert(!mem->gobbled);
1805 mem->wire_count++;
1806 }
1807
1808 /*
1809 * vm_page_gobble:
1810 *
1811 * Mark this page as consumed by the vm/ipc/xmm subsystems.
1812 *
1813 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
1814 */
1815 void
1816 vm_page_gobble(
1817 register vm_page_t mem)
1818 {
1819 vm_page_lock_queues();
1820 VM_PAGE_CHECK(mem);
1821
1822 assert(!mem->gobbled);
1823 assert(mem->wire_count == 0);
1824
1825 if (!mem->gobbled && mem->wire_count == 0) {
1826 if (!mem->private && !mem->fictitious)
1827 vm_page_wire_count++;
1828 }
1829 vm_page_gobble_count++;
1830 mem->gobbled = TRUE;
1831 vm_page_unlock_queues();
1832 }
1833
1834 /*
1835 * vm_page_unwire:
1836 *
1837 * Release one wiring of this page, potentially
1838 * enabling it to be paged again.
1839 *
1840 * The page's object and the page queues must be locked.
1841 */
1842 void
1843 vm_page_unwire(
1844 register vm_page_t mem)
1845 {
1846
1847 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
1848
1849 VM_PAGE_CHECK(mem);
1850 assert(mem->wire_count > 0);
1851 #if DEBUG
1852 if (mem->object)
1853 _mutex_assert(&mem->object->Lock, MA_OWNED);
1854 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1855 #endif
1856 if (--mem->wire_count == 0) {
1857 assert(!mem->private && !mem->fictitious);
1858 vm_page_wire_count--;
1859 assert(!mem->laundry);
1860 assert(mem->object != kernel_object);
1861 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
1862 queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
1863 vm_page_active_count++;
1864 mem->active = TRUE;
1865 mem->reference = TRUE;
1866 }
1867 }
1868
1869 /*
1870 * vm_page_deactivate:
1871 *
1872 * Returns the given page to the inactive list,
1873 * indicating that no physical maps have access
1874 * to this page. [Used by the physical mapping system.]
1875 *
1876 * The page queues must be locked.
1877 */
1878 void
1879 vm_page_deactivate(
1880 register vm_page_t m)
1881 {
1882 VM_PAGE_CHECK(m);
1883 assert(m->object != kernel_object);
1884
1885 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
1886 #if DEBUG
1887 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1888 #endif
1889 /*
1890 * This page is no longer very interesting. If it was
1891 * interesting (active or inactive/referenced), then we
1892 * clear the reference bit and (re)enter it in the
1893 * inactive queue. Note wired pages should not have
1894 * their reference bit cleared.
1895 */
1896 if (m->gobbled) { /* can this happen? */
1897 assert(m->wire_count == 0);
1898 if (!m->private && !m->fictitious)
1899 vm_page_wire_count--;
1900 vm_page_gobble_count--;
1901 m->gobbled = FALSE;
1902 }
1903 if (m->private || (m->wire_count != 0))
1904 return;
1905 if (m->active || (m->inactive && m->reference)) {
1906 if (!m->fictitious && !m->absent)
1907 pmap_clear_reference(m->phys_page);
1908 m->reference = FALSE;
1909 VM_PAGE_QUEUES_REMOVE(m);
1910 }
1911 if (m->wire_count == 0 && !m->inactive) {
1912 m->page_ticket = vm_page_ticket;
1913 vm_page_ticket_roll++;
1914
1915 if(vm_page_ticket_roll == VM_PAGE_TICKETS_IN_ROLL) {
1916 vm_page_ticket_roll = 0;
1917 if(vm_page_ticket == VM_PAGE_TICKET_ROLL_IDS)
1918 vm_page_ticket= 0;
1919 else
1920 vm_page_ticket++;
1921 }
1922
1923 assert(!m->laundry);
1924 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
1925 if(m->zero_fill) {
1926 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
1927 } else {
1928 queue_enter(&vm_page_queue_inactive,
1929 m, vm_page_t, pageq);
1930 }
1931
1932 m->inactive = TRUE;
1933 if (!m->fictitious)
1934 vm_page_inactive_count++;
1935 }
1936 }
1937
1938 /*
1939 * vm_page_activate:
1940 *
1941 * Put the specified page on the active list (if appropriate).
1942 *
1943 * The page queues must be locked.
1944 */
1945
1946 void
1947 vm_page_activate(
1948 register vm_page_t m)
1949 {
1950 VM_PAGE_CHECK(m);
1951 assert(m->object != kernel_object);
1952 #if DEBUG
1953 _mutex_assert(&vm_page_queue_lock, MA_OWNED);
1954 #endif
1955 if (m->gobbled) {
1956 assert(m->wire_count == 0);
1957 if (!m->private && !m->fictitious)
1958 vm_page_wire_count--;
1959 vm_page_gobble_count--;
1960 m->gobbled = FALSE;
1961 }
1962 if (m->private)
1963 return;
1964
1965 if (m->inactive) {
1966 assert(!m->laundry);
1967 if (m->zero_fill) {
1968 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq);
1969 } else {
1970 queue_remove(&vm_page_queue_inactive,
1971 m, vm_page_t, pageq);
1972 }
1973 m->pageq.next = NULL;
1974 m->pageq.prev = NULL;
1975 if (!m->fictitious)
1976 vm_page_inactive_count--;
1977 m->inactive = FALSE;
1978 }
1979 if (m->wire_count == 0) {
1980 #if DEBUG
1981 if (m->active)
1982 panic("vm_page_activate: already active");
1983 #endif
1984 assert(!m->laundry);
1985 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
1986 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
1987 m->active = TRUE;
1988 m->reference = TRUE;
1989 if (!m->fictitious)
1990 vm_page_active_count++;
1991 }
1992 }
1993
1994 /*
1995 * vm_page_part_zero_fill:
1996 *
1997 * Zero-fill a part of the page.
1998 */
1999 void
2000 vm_page_part_zero_fill(
2001 vm_page_t m,
2002 vm_offset_t m_pa,
2003 vm_size_t len)
2004 {
2005 vm_page_t tmp;
2006
2007 VM_PAGE_CHECK(m);
2008 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
2009 pmap_zero_part_page(m->phys_page, m_pa, len);
2010 #else
2011 while (1) {
2012 tmp = vm_page_grab();
2013 if (tmp == VM_PAGE_NULL) {
2014 vm_page_wait(THREAD_UNINT);
2015 continue;
2016 }
2017 break;
2018 }
2019 vm_page_zero_fill(tmp);
2020 if(m_pa != 0) {
2021 vm_page_part_copy(m, 0, tmp, 0, m_pa);
2022 }
2023 if((m_pa + len) < PAGE_SIZE) {
2024 vm_page_part_copy(m, m_pa + len, tmp,
2025 m_pa + len, PAGE_SIZE - (m_pa + len));
2026 }
2027 vm_page_copy(tmp,m);
2028 vm_page_lock_queues();
2029 vm_page_free(tmp);
2030 vm_page_unlock_queues();
2031 #endif
2032
2033 }
2034
2035 /*
2036 * vm_page_zero_fill:
2037 *
2038 * Zero-fill the specified page.
2039 */
2040 void
2041 vm_page_zero_fill(
2042 vm_page_t m)
2043 {
2044 XPR(XPR_VM_PAGE,
2045 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
2046 (integer_t)m->object, (integer_t)m->offset, (integer_t)m, 0,0);
2047
2048 VM_PAGE_CHECK(m);
2049
2050 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
2051 pmap_zero_page(m->phys_page);
2052 }
2053
2054 /*
2055 * vm_page_part_copy:
2056 *
2057 * copy part of one page to another
2058 */
2059
2060 void
2061 vm_page_part_copy(
2062 vm_page_t src_m,
2063 vm_offset_t src_pa,
2064 vm_page_t dst_m,
2065 vm_offset_t dst_pa,
2066 vm_size_t len)
2067 {
2068 VM_PAGE_CHECK(src_m);
2069 VM_PAGE_CHECK(dst_m);
2070
2071 pmap_copy_part_page(src_m->phys_page, src_pa,
2072 dst_m->phys_page, dst_pa, len);
2073 }
2074
2075 /*
2076 * vm_page_copy:
2077 *
2078 * Copy one page to another
2079 *
2080 * ENCRYPTED SWAP:
2081 * The source page should not be encrypted. The caller should
2082 * make sure the page is decrypted first, if necessary.
2083 */
2084
2085 void
2086 vm_page_copy(
2087 vm_page_t src_m,
2088 vm_page_t dest_m)
2089 {
2090 XPR(XPR_VM_PAGE,
2091 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
2092 (integer_t)src_m->object, src_m->offset,
2093 (integer_t)dest_m->object, dest_m->offset,
2094 0);
2095
2096 VM_PAGE_CHECK(src_m);
2097 VM_PAGE_CHECK(dest_m);
2098
2099 /*
2100 * ENCRYPTED SWAP:
2101 * The source page should not be encrypted at this point.
2102 * The destination page will therefore not contain encrypted
2103 * data after the copy.
2104 */
2105 if (src_m->encrypted) {
2106 panic("vm_page_copy: source page %p is encrypted\n", src_m);
2107 }
2108 dest_m->encrypted = FALSE;
2109
2110 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
2111 }
2112
2113 /*
2114 * Currently, this is a primitive allocator that grabs
2115 * free pages from the system, sorts them by physical
2116 * address, then searches for a region large enough to
2117 * satisfy the user's request.
2118 *
2119 * Additional levels of effort:
2120 * + steal clean active/inactive pages
2121 * + force pageouts of dirty pages
2122 * + maintain a map of available physical
2123 * memory
2124 */
2125
2126 #if MACH_ASSERT
2127 /*
2128 * Check that the list of pages is ordered by
2129 * ascending physical address and has no holes.
2130 */
2131 int vm_page_verify_contiguous(
2132 vm_page_t pages,
2133 unsigned int npages);
2134
2135 int
2136 vm_page_verify_contiguous(
2137 vm_page_t pages,
2138 unsigned int npages)
2139 {
2140 register vm_page_t m;
2141 unsigned int page_count;
2142 vm_offset_t prev_addr;
2143
2144 prev_addr = pages->phys_page;
2145 page_count = 1;
2146 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
2147 if (m->phys_page != prev_addr + 1) {
2148 printf("m 0x%x prev_addr 0x%x, current addr 0x%x\n",
2149 m, prev_addr, m->phys_page);
2150 printf("pages 0x%x page_count %d\n", pages, page_count);
2151 panic("vm_page_verify_contiguous: not contiguous!");
2152 }
2153 prev_addr = m->phys_page;
2154 ++page_count;
2155 }
2156 if (page_count != npages) {
2157 printf("pages 0x%x actual count 0x%x but requested 0x%x\n",
2158 pages, page_count, npages);
2159 panic("vm_page_verify_contiguous: count error");
2160 }
2161 return 1;
2162 }
2163 #endif /* MACH_ASSERT */
2164
2165
2166 cpm_counter(unsigned int vpfls_pages_handled = 0;)
2167 cpm_counter(unsigned int vpfls_head_insertions = 0;)
2168 cpm_counter(unsigned int vpfls_tail_insertions = 0;)
2169 cpm_counter(unsigned int vpfls_general_insertions = 0;)
2170 cpm_counter(unsigned int vpfc_failed = 0;)
2171 cpm_counter(unsigned int vpfc_satisfied = 0;)
2172
2173 /*
2174 * Find a region large enough to contain at least npages
2175 * of contiguous physical memory.
2176 *
2177 * Requirements:
2178 * - Called while holding vm_page_queue_free_lock.
2179 * - Doesn't respect vm_page_free_reserved; caller
2180 * must not ask for more pages than are legal to grab.
2181 *
2182 * Returns a pointer to a list of gobbled pages or VM_PAGE_NULL.
2183 *
2184 * Algorithm:
2185 * Loop over the free list, extracting one page at a time and
2186 * inserting those into a sorted sub-list. We stop as soon as
2187 * there's a contiguous range within the sorted list that can
2188 * satisfy the contiguous memory request. This contiguous sub-
2189 * list is chopped out of the sorted sub-list and the remainder
2190 * of the sorted sub-list is put back onto the beginning of the
2191 * free list.
2192 */
2193 static vm_page_t
2194 vm_page_find_contiguous(
2195 unsigned int contig_pages)
2196 {
2197 vm_page_t sort_list;
2198 vm_page_t *contfirstprev, contlast;
2199 vm_page_t m, m1;
2200 ppnum_t prevcontaddr;
2201 ppnum_t nextcontaddr;
2202 unsigned int npages;
2203
2204 m = NULL;
2205 #if DEBUG
2206 _mutex_assert(&vm_page_queue_free_lock, MA_OWNED);
2207 #endif
2208 #if MACH_ASSERT
2209 /*
2210 * Verify pages in the free list..
2211 */
2212 npages = 0;
2213 for (m = vm_page_queue_free; m != VM_PAGE_NULL; m = NEXT_PAGE(m))
2214 ++npages;
2215 if (npages != vm_page_free_count)
2216 panic("vm_sort_free_list: prelim: npages %u free_count %d",
2217 npages, vm_page_free_count);
2218 #endif /* MACH_ASSERT */
2219
2220 if (contig_pages == 0 || vm_page_queue_free == VM_PAGE_NULL)
2221 return VM_PAGE_NULL;
2222
2223 #define PPNUM_PREV(x) (((x) > 0) ? ((x) - 1) : 0)
2224 #define PPNUM_NEXT(x) (((x) < PPNUM_MAX) ? ((x) + 1) : PPNUM_MAX)
2225 #define SET_NEXT_PAGE(m,n) ((m)->pageq.next = (struct queue_entry *) (n))
2226
2227 npages = 1;
2228 contfirstprev = &sort_list;
2229 contlast = sort_list = vm_page_queue_free;
2230 vm_page_queue_free = NEXT_PAGE(sort_list);
2231 SET_NEXT_PAGE(sort_list, VM_PAGE_NULL);
2232 prevcontaddr = PPNUM_PREV(sort_list->phys_page);
2233 nextcontaddr = PPNUM_NEXT(sort_list->phys_page);
2234
2235 while (npages < contig_pages &&
2236 (m = vm_page_queue_free) != VM_PAGE_NULL)
2237 {
2238 cpm_counter(++vpfls_pages_handled);
2239
2240 /* prepend to existing run? */
2241 if (m->phys_page == prevcontaddr)
2242 {
2243 vm_page_queue_free = NEXT_PAGE(m);
2244 cpm_counter(++vpfls_head_insertions);
2245 prevcontaddr = PPNUM_PREV(prevcontaddr);
2246 SET_NEXT_PAGE(m, *contfirstprev);
2247 *contfirstprev = m;
2248 npages++;
2249 continue; /* no tail expansion check needed */
2250 }
2251
2252 /* append to tail of existing run? */
2253 else if (m->phys_page == nextcontaddr)
2254 {
2255 vm_page_queue_free = NEXT_PAGE(m);
2256 cpm_counter(++vpfls_tail_insertions);
2257 nextcontaddr = PPNUM_NEXT(nextcontaddr);
2258 SET_NEXT_PAGE(m, NEXT_PAGE(contlast));
2259 SET_NEXT_PAGE(contlast, m);
2260 contlast = m;
2261 npages++;
2262 }
2263
2264 /* prepend to the very front of sorted list? */
2265 else if (m->phys_page < sort_list->phys_page)
2266 {
2267 vm_page_queue_free = NEXT_PAGE(m);
2268 cpm_counter(++vpfls_general_insertions);
2269 prevcontaddr = PPNUM_PREV(m->phys_page);
2270 nextcontaddr = PPNUM_NEXT(m->phys_page);
2271 SET_NEXT_PAGE(m, sort_list);
2272 contfirstprev = &sort_list;
2273 contlast = sort_list = m;
2274 npages = 1;
2275 }
2276
2277 else /* get to proper place for insertion */
2278 {
2279 if (m->phys_page < nextcontaddr)
2280 {
2281 prevcontaddr = PPNUM_PREV(sort_list->phys_page);
2282 nextcontaddr = PPNUM_NEXT(sort_list->phys_page);
2283 contfirstprev = &sort_list;
2284 contlast = sort_list;
2285 npages = 1;
2286 }
2287 for (m1 = NEXT_PAGE(contlast);
2288 npages < contig_pages &&
2289 m1 != VM_PAGE_NULL && m1->phys_page < m->phys_page;
2290 m1 = NEXT_PAGE(m1))
2291 {
2292 if (m1->phys_page != nextcontaddr) {
2293 prevcontaddr = PPNUM_PREV(m1->phys_page);
2294 contfirstprev = NEXT_PAGE_PTR(contlast);
2295 npages = 1;
2296 } else {
2297 npages++;
2298 }
2299 nextcontaddr = PPNUM_NEXT(m1->phys_page);
2300 contlast = m1;
2301 }
2302
2303 /*
2304 * We may actually already have enough.
2305 * This could happen if a previous prepend
2306 * joined up two runs to meet our needs.
2307 * If so, bail before we take the current
2308 * page off the free queue.
2309 */
2310 if (npages == contig_pages)
2311 break;
2312
2313 if (m->phys_page != nextcontaddr)
2314 {
2315 contfirstprev = NEXT_PAGE_PTR(contlast);
2316 prevcontaddr = PPNUM_PREV(m->phys_page);
2317 nextcontaddr = PPNUM_NEXT(m->phys_page);
2318 npages = 1;
2319 } else {
2320 nextcontaddr = PPNUM_NEXT(nextcontaddr);
2321 npages++;
2322 }
2323 vm_page_queue_free = NEXT_PAGE(m);
2324 cpm_counter(++vpfls_general_insertions);
2325 SET_NEXT_PAGE(m, NEXT_PAGE(contlast));
2326 SET_NEXT_PAGE(contlast, m);
2327 contlast = m;
2328 }
2329
2330 /* See how many pages are now contiguous after the insertion */
2331 for (m1 = NEXT_PAGE(m);
2332 npages < contig_pages &&
2333 m1 != VM_PAGE_NULL && m1->phys_page == nextcontaddr;
2334 m1 = NEXT_PAGE(m1))
2335 {
2336 nextcontaddr = PPNUM_NEXT(nextcontaddr);
2337 contlast = m1;
2338 npages++;
2339 }
2340 }
2341
2342 /* how did we do? */
2343 if (npages == contig_pages)
2344 {
2345 cpm_counter(++vpfc_satisfied);
2346
2347 /* remove the contiguous range from the sorted list */
2348 m = *contfirstprev;
2349 *contfirstprev = NEXT_PAGE(contlast);
2350 SET_NEXT_PAGE(contlast, VM_PAGE_NULL);
2351 assert(vm_page_verify_contiguous(m, npages));
2352
2353 /* inline vm_page_gobble() for each returned page */
2354 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
2355 assert(m1->free);
2356 assert(!m1->wanted);
2357 assert(!m1->laundry);
2358 m1->free = FALSE;
2359 m1->no_isync = TRUE;
2360 m1->gobbled = TRUE;
2361 }
2362 vm_page_wire_count += npages;
2363 vm_page_gobble_count += npages;
2364 vm_page_free_count -= npages;
2365
2366 /* stick free list at the tail of the sorted list */
2367 while ((m1 = *contfirstprev) != VM_PAGE_NULL)
2368 contfirstprev = (vm_page_t *)&m1->pageq.next;
2369 *contfirstprev = vm_page_queue_free;
2370 }
2371
2372 vm_page_queue_free = sort_list;
2373 return m;
2374 }
2375
2376 /*
2377 * Allocate a list of contiguous, wired pages.
2378 */
2379 kern_return_t
2380 cpm_allocate(
2381 vm_size_t size,
2382 vm_page_t *list,
2383 boolean_t wire)
2384 {
2385 register vm_page_t m;
2386 vm_page_t pages;
2387 unsigned int npages;
2388 unsigned int vm_pages_available;
2389 boolean_t wakeup;
2390
2391 if (size % page_size != 0)
2392 return KERN_INVALID_ARGUMENT;
2393
2394 vm_page_lock_queues();
2395 mutex_lock(&vm_page_queue_free_lock);
2396
2397 /*
2398 * Should also take active and inactive pages
2399 * into account... One day...
2400 */
2401 npages = size / page_size;
2402 vm_pages_available = vm_page_free_count - vm_page_free_reserved;
2403
2404 if (npages > vm_pages_available) {
2405 mutex_unlock(&vm_page_queue_free_lock);
2406 vm_page_unlock_queues();
2407 return KERN_RESOURCE_SHORTAGE;
2408 }
2409
2410 /*
2411 * Obtain a pointer to a subset of the free
2412 * list large enough to satisfy the request;
2413 * the region will be physically contiguous.
2414 */
2415 pages = vm_page_find_contiguous(npages);
2416
2417 /* adjust global freelist counts and determine need for wakeups */
2418 if (vm_page_free_count < vm_page_free_count_minimum)
2419 vm_page_free_count_minimum = vm_page_free_count;
2420
2421 wakeup = ((vm_page_free_count < vm_page_free_min) ||
2422 ((vm_page_free_count < vm_page_free_target) &&
2423 (vm_page_inactive_count < vm_page_inactive_target)));
2424
2425 mutex_unlock(&vm_page_queue_free_lock);
2426
2427 if (pages == VM_PAGE_NULL) {
2428 vm_page_unlock_queues();
2429 return KERN_NO_SPACE;
2430 }
2431
2432 /*
2433 * Walk the returned list, wiring the pages.
2434 */
2435 if (wire == TRUE)
2436 for (m = pages; m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
2437 /*
2438 * Essentially inlined vm_page_wire.
2439 */
2440 assert(!m->active);
2441 assert(!m->inactive);
2442 assert(!m->private);
2443 assert(!m->fictitious);
2444 assert(m->wire_count == 0);
2445 assert(m->gobbled);
2446 m->gobbled = FALSE;
2447 m->wire_count++;
2448 --vm_page_gobble_count;
2449 }
2450 vm_page_unlock_queues();
2451
2452 if (wakeup)
2453 thread_wakeup((event_t) &vm_page_free_wanted);
2454
2455 /*
2456 * The CPM pages should now be available and
2457 * ordered by ascending physical address.
2458 */
2459 assert(vm_page_verify_contiguous(pages, npages));
2460
2461 *list = pages;
2462 return KERN_SUCCESS;
2463 }
2464
2465
2466 #include <mach_vm_debug.h>
2467 #if MACH_VM_DEBUG
2468
2469 #include <mach_debug/hash_info.h>
2470 #include <vm/vm_debug.h>
2471
2472 /*
2473 * Routine: vm_page_info
2474 * Purpose:
2475 * Return information about the global VP table.
2476 * Fills the buffer with as much information as possible
2477 * and returns the desired size of the buffer.
2478 * Conditions:
2479 * Nothing locked. The caller should provide
2480 * possibly-pageable memory.
2481 */
2482
2483 unsigned int
2484 vm_page_info(
2485 hash_info_bucket_t *info,
2486 unsigned int count)
2487 {
2488 unsigned int i;
2489
2490 if (vm_page_bucket_count < count)
2491 count = vm_page_bucket_count;
2492
2493 for (i = 0; i < count; i++) {
2494 vm_page_bucket_t *bucket = &vm_page_buckets[i];
2495 unsigned int bucket_count = 0;
2496 vm_page_t m;
2497
2498 simple_lock(&vm_page_bucket_lock);
2499 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
2500 bucket_count++;
2501 simple_unlock(&vm_page_bucket_lock);
2502
2503 /* don't touch pageable memory while holding locks */
2504 info[i].hib_count = bucket_count;
2505 }
2506
2507 return vm_page_bucket_count;
2508 }
2509 #endif /* MACH_VM_DEBUG */
2510
2511 #include <mach_kdb.h>
2512 #if MACH_KDB
2513
2514 #include <ddb/db_output.h>
2515 #include <vm/vm_print.h>
2516 #define printf kdbprintf
2517
2518 /*
2519 * Routine: vm_page_print [exported]
2520 */
2521 void
2522 vm_page_print(
2523 db_addr_t db_addr)
2524 {
2525 vm_page_t p;
2526
2527 p = (vm_page_t) (long) db_addr;
2528
2529 iprintf("page 0x%x\n", p);
2530
2531 db_indent += 2;
2532
2533 iprintf("object=0x%x", p->object);
2534 printf(", offset=0x%x", p->offset);
2535 printf(", wire_count=%d", p->wire_count);
2536
2537 iprintf("%sinactive, %sactive, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
2538 (p->inactive ? "" : "!"),
2539 (p->active ? "" : "!"),
2540 (p->gobbled ? "" : "!"),
2541 (p->laundry ? "" : "!"),
2542 (p->free ? "" : "!"),
2543 (p->reference ? "" : "!"),
2544 (p->encrypted ? "" : "!"));
2545 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
2546 (p->busy ? "" : "!"),
2547 (p->wanted ? "" : "!"),
2548 (p->tabled ? "" : "!"),
2549 (p->fictitious ? "" : "!"),
2550 (p->private ? "" : "!"),
2551 (p->precious ? "" : "!"));
2552 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
2553 (p->absent ? "" : "!"),
2554 (p->error ? "" : "!"),
2555 (p->dirty ? "" : "!"),
2556 (p->cleaning ? "" : "!"),
2557 (p->pageout ? "" : "!"),
2558 (p->clustered ? "" : "!"));
2559 iprintf("%slock_supplied, %soverwriting, %srestart, %sunusual\n",
2560 (p->lock_supplied ? "" : "!"),
2561 (p->overwriting ? "" : "!"),
2562 (p->restart ? "" : "!"),
2563 (p->unusual ? "" : "!"));
2564
2565 iprintf("phys_page=0x%x", p->phys_page);
2566 printf(", page_error=0x%x", p->page_error);
2567 printf(", page_lock=0x%x", p->page_lock);
2568 printf(", unlock_request=%d\n", p->unlock_request);
2569
2570 db_indent -= 2;
2571 }
2572 #endif /* MACH_KDB */