]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <ppc/mappings.h> /* (BRINGUP) */
89 #include <pexpert/pexpert.h> /* (BRINGUP) */
90
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94
95 #include <IOKit/IOHibernatePrivate.h>
96
97
98 #if CONFIG_EMBEDDED
99 #include <sys/kern_memorystatus.h>
100 #endif
101
102 #include <sys/kdebug.h>
103
104 boolean_t vm_page_free_verify = TRUE;
105
106 int speculative_age_index = 0;
107 int speculative_steal_index = 0;
108 lck_mtx_ext_t vm_page_queue_lock_ext;
109 lck_mtx_ext_t vm_page_queue_free_lock_ext;
110 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
111
112 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
113
114
115 __private_extern__ void vm_page_init_lck_grp(void);
116
117 static void vm_page_free_prepare(vm_page_t page);
118
119
120
121 /*
122 * Associated with page of user-allocatable memory is a
123 * page structure.
124 */
125
126 /*
127 * These variables record the values returned by vm_page_bootstrap,
128 * for debugging purposes. The implementation of pmap_steal_memory
129 * and pmap_startup here also uses them internally.
130 */
131
132 vm_offset_t virtual_space_start;
133 vm_offset_t virtual_space_end;
134 int vm_page_pages;
135
136 /*
137 * The vm_page_lookup() routine, which provides for fast
138 * (virtual memory object, offset) to page lookup, employs
139 * the following hash table. The vm_page_{insert,remove}
140 * routines install and remove associations in the table.
141 * [This table is often called the virtual-to-physical,
142 * or VP, table.]
143 */
144 typedef struct {
145 vm_page_t pages;
146 #if MACH_PAGE_HASH_STATS
147 int cur_count; /* current count */
148 int hi_count; /* high water mark */
149 #endif /* MACH_PAGE_HASH_STATS */
150 } vm_page_bucket_t;
151
152
153 #define BUCKETS_PER_LOCK 16
154
155 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
156 unsigned int vm_page_bucket_count = 0; /* How big is array? */
157 unsigned int vm_page_hash_mask; /* Mask for hash function */
158 unsigned int vm_page_hash_shift; /* Shift for hash function */
159 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
160 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
161
162 lck_spin_t *vm_page_bucket_locks;
163
164
165 #if MACH_PAGE_HASH_STATS
166 /* This routine is only for debug. It is intended to be called by
167 * hand by a developer using a kernel debugger. This routine prints
168 * out vm_page_hash table statistics to the kernel debug console.
169 */
170 void
171 hash_debug(void)
172 {
173 int i;
174 int numbuckets = 0;
175 int highsum = 0;
176 int maxdepth = 0;
177
178 for (i = 0; i < vm_page_bucket_count; i++) {
179 if (vm_page_buckets[i].hi_count) {
180 numbuckets++;
181 highsum += vm_page_buckets[i].hi_count;
182 if (vm_page_buckets[i].hi_count > maxdepth)
183 maxdepth = vm_page_buckets[i].hi_count;
184 }
185 }
186 printf("Total number of buckets: %d\n", vm_page_bucket_count);
187 printf("Number used buckets: %d = %d%%\n",
188 numbuckets, 100*numbuckets/vm_page_bucket_count);
189 printf("Number unused buckets: %d = %d%%\n",
190 vm_page_bucket_count - numbuckets,
191 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
192 printf("Sum of bucket max depth: %d\n", highsum);
193 printf("Average bucket depth: %d.%2d\n",
194 highsum/vm_page_bucket_count,
195 highsum%vm_page_bucket_count);
196 printf("Maximum bucket depth: %d\n", maxdepth);
197 }
198 #endif /* MACH_PAGE_HASH_STATS */
199
200 /*
201 * The virtual page size is currently implemented as a runtime
202 * variable, but is constant once initialized using vm_set_page_size.
203 * This initialization must be done in the machine-dependent
204 * bootstrap sequence, before calling other machine-independent
205 * initializations.
206 *
207 * All references to the virtual page size outside this
208 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
209 * constants.
210 */
211 vm_size_t page_size = PAGE_SIZE;
212 vm_size_t page_mask = PAGE_MASK;
213 int page_shift = PAGE_SHIFT;
214
215 /*
216 * Resident page structures are initialized from
217 * a template (see vm_page_alloc).
218 *
219 * When adding a new field to the virtual memory
220 * object structure, be sure to add initialization
221 * (see vm_page_bootstrap).
222 */
223 struct vm_page vm_page_template;
224
225 vm_page_t vm_pages = VM_PAGE_NULL;
226 unsigned int vm_pages_count = 0;
227
228 /*
229 * Resident pages that represent real memory
230 * are allocated from a set of free lists,
231 * one per color.
232 */
233 unsigned int vm_colors;
234 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
235 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
236 queue_head_t vm_page_queue_free[MAX_COLORS];
237 vm_page_t vm_page_queue_fictitious;
238 unsigned int vm_page_free_wanted;
239 unsigned int vm_page_free_wanted_privileged;
240 unsigned int vm_page_free_count;
241 unsigned int vm_page_fictitious_count;
242
243 unsigned int vm_page_free_count_minimum; /* debugging */
244
245 /*
246 * Occasionally, the virtual memory system uses
247 * resident page structures that do not refer to
248 * real pages, for example to leave a page with
249 * important state information in the VP table.
250 *
251 * These page structures are allocated the way
252 * most other kernel structures are.
253 */
254 zone_t vm_page_zone;
255 vm_locks_array_t vm_page_locks;
256 decl_lck_mtx_data(,vm_page_alloc_lock)
257 unsigned int io_throttle_zero_fill;
258
259 unsigned int vm_page_local_q_count = 0;
260 unsigned int vm_page_local_q_soft_limit = 250;
261 unsigned int vm_page_local_q_hard_limit = 500;
262 struct vplq *vm_page_local_q = NULL;
263
264 /*
265 * Fictitious pages don't have a physical address,
266 * but we must initialize phys_page to something.
267 * For debugging, this should be a strange value
268 * that the pmap module can recognize in assertions.
269 */
270 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
271
272 /*
273 * Guard pages are not accessible so they don't
274 * need a physical address, but we need to enter
275 * one in the pmap.
276 * Let's make it recognizable and make sure that
277 * we don't use a real physical page with that
278 * physical address.
279 */
280 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
281
282 /*
283 * Resident page structures are also chained on
284 * queues that are used by the page replacement
285 * system (pageout daemon). These queues are
286 * defined here, but are shared by the pageout
287 * module. The inactive queue is broken into
288 * inactive and zf for convenience as the
289 * pageout daemon often assignes a higher
290 * affinity to zf pages
291 */
292 queue_head_t vm_page_queue_active;
293 queue_head_t vm_page_queue_inactive;
294 queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
295 queue_head_t vm_page_queue_throttled;
296
297 unsigned int vm_page_active_count;
298 unsigned int vm_page_inactive_count;
299 unsigned int vm_page_throttled_count;
300 unsigned int vm_page_speculative_count;
301 unsigned int vm_page_wire_count;
302 unsigned int vm_page_gobble_count = 0;
303 unsigned int vm_page_wire_count_warning = 0;
304 unsigned int vm_page_gobble_count_warning = 0;
305
306 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
307 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
308 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
309
310 #if DEVELOPMENT || DEBUG
311 unsigned int vm_page_speculative_recreated = 0;
312 unsigned int vm_page_speculative_created = 0;
313 unsigned int vm_page_speculative_used = 0;
314 #endif
315
316 ppnum_t vm_lopage_poolstart = 0;
317 ppnum_t vm_lopage_poolend = 0;
318 int vm_lopage_poolsize = 0;
319 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
320
321
322 /*
323 * Several page replacement parameters are also
324 * shared with this module, so that page allocation
325 * (done here in vm_page_alloc) can trigger the
326 * pageout daemon.
327 */
328 unsigned int vm_page_free_target = 0;
329 unsigned int vm_page_free_min = 0;
330 unsigned int vm_page_throttle_limit = 0;
331 uint32_t vm_page_creation_throttle = 0;
332 unsigned int vm_page_inactive_target = 0;
333 unsigned int vm_page_inactive_min = 0;
334 unsigned int vm_page_free_reserved = 0;
335 unsigned int vm_page_throttle_count = 0;
336
337 /*
338 * The VM system has a couple of heuristics for deciding
339 * that pages are "uninteresting" and should be placed
340 * on the inactive queue as likely candidates for replacement.
341 * These variables let the heuristics be controlled at run-time
342 * to make experimentation easier.
343 */
344
345 boolean_t vm_page_deactivate_hint = TRUE;
346
347 struct vm_page_stats_reusable vm_page_stats_reusable;
348
349 /*
350 * vm_set_page_size:
351 *
352 * Sets the page size, perhaps based upon the memory
353 * size. Must be called before any use of page-size
354 * dependent functions.
355 *
356 * Sets page_shift and page_mask from page_size.
357 */
358 void
359 vm_set_page_size(void)
360 {
361 page_mask = page_size - 1;
362
363 if ((page_mask & page_size) != 0)
364 panic("vm_set_page_size: page size not a power of two");
365
366 for (page_shift = 0; ; page_shift++)
367 if ((1U << page_shift) == page_size)
368 break;
369 }
370
371
372 /* Called once during statup, once the cache geometry is known.
373 */
374 static void
375 vm_page_set_colors( void )
376 {
377 unsigned int n, override;
378
379 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
380 n = override;
381 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
382 n = vm_cache_geometry_colors;
383 else n = DEFAULT_COLORS; /* use default if all else fails */
384
385 if ( n == 0 )
386 n = 1;
387 if ( n > MAX_COLORS )
388 n = MAX_COLORS;
389
390 /* the count must be a power of 2 */
391 if ( ( n & (n - 1)) != 0 )
392 panic("vm_page_set_colors");
393
394 vm_colors = n;
395 vm_color_mask = n - 1;
396 }
397
398
399 lck_grp_t vm_page_lck_grp_free;
400 lck_grp_t vm_page_lck_grp_queue;
401 lck_grp_t vm_page_lck_grp_local;
402 lck_grp_t vm_page_lck_grp_purge;
403 lck_grp_t vm_page_lck_grp_alloc;
404 lck_grp_t vm_page_lck_grp_bucket;
405 lck_grp_attr_t vm_page_lck_grp_attr;
406 lck_attr_t vm_page_lck_attr;
407
408
409 __private_extern__ void
410 vm_page_init_lck_grp(void)
411 {
412 /*
413 * initialze the vm_page lock world
414 */
415 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
416 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
417 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
418 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
419 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
420 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
421 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
422 lck_attr_setdefault(&vm_page_lck_attr);
423 }
424
425 void
426 vm_page_init_local_q()
427 {
428 unsigned int num_cpus;
429 unsigned int i;
430 struct vplq *t_local_q;
431
432 num_cpus = ml_get_max_cpus();
433
434 /*
435 * no point in this for a uni-processor system
436 */
437 if (num_cpus >= 2) {
438 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
439
440 for (i = 0; i < num_cpus; i++) {
441 struct vpl *lq;
442
443 lq = &t_local_q[i].vpl_un.vpl;
444 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
445 queue_init(&lq->vpl_queue);
446 lq->vpl_count = 0;
447 }
448 vm_page_local_q_count = num_cpus;
449
450 vm_page_local_q = (struct vplq *)t_local_q;
451 }
452 }
453
454
455 /*
456 * vm_page_bootstrap:
457 *
458 * Initializes the resident memory module.
459 *
460 * Allocates memory for the page cells, and
461 * for the object/offset-to-page hash table headers.
462 * Each page cell is initialized and placed on the free list.
463 * Returns the range of available kernel virtual memory.
464 */
465
466 void
467 vm_page_bootstrap(
468 vm_offset_t *startp,
469 vm_offset_t *endp)
470 {
471 register vm_page_t m;
472 unsigned int i;
473 unsigned int log1;
474 unsigned int log2;
475 unsigned int size;
476
477 /*
478 * Initialize the vm_page template.
479 */
480
481 m = &vm_page_template;
482 bzero(m, sizeof (*m));
483
484 m->pageq.next = NULL;
485 m->pageq.prev = NULL;
486 m->listq.next = NULL;
487 m->listq.prev = NULL;
488 m->next = VM_PAGE_NULL;
489
490 m->object = VM_OBJECT_NULL; /* reset later */
491 m->offset = (vm_object_offset_t) -1; /* reset later */
492
493 m->wire_count = 0;
494 m->local = FALSE;
495 m->inactive = FALSE;
496 m->active = FALSE;
497 m->pageout_queue = FALSE;
498 m->speculative = FALSE;
499 m->laundry = FALSE;
500 m->free = FALSE;
501 m->reference = FALSE;
502 m->gobbled = FALSE;
503 m->private = FALSE;
504 m->throttled = FALSE;
505 m->__unused_pageq_bits = 0;
506
507 m->phys_page = 0; /* reset later */
508
509 m->busy = TRUE;
510 m->wanted = FALSE;
511 m->tabled = FALSE;
512 m->fictitious = FALSE;
513 m->pmapped = FALSE;
514 m->wpmapped = FALSE;
515 m->pageout = FALSE;
516 m->absent = FALSE;
517 m->error = FALSE;
518 m->dirty = FALSE;
519 m->cleaning = FALSE;
520 m->precious = FALSE;
521 m->clustered = FALSE;
522 m->overwriting = FALSE;
523 m->restart = FALSE;
524 m->unusual = FALSE;
525 m->encrypted = FALSE;
526 m->encrypted_cleaning = FALSE;
527 m->list_req_pending = FALSE;
528 m->dump_cleaning = FALSE;
529 m->cs_validated = FALSE;
530 m->cs_tainted = FALSE;
531 m->no_cache = FALSE;
532 m->zero_fill = FALSE;
533 m->reusable = FALSE;
534 m->__unused_object_bits = 0;
535
536
537 /*
538 * Initialize the page queues.
539 */
540 vm_page_init_lck_grp();
541
542 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
543 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
544 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
545
546 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
547 int group;
548
549 purgeable_queues[i].token_q_head = 0;
550 purgeable_queues[i].token_q_tail = 0;
551 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
552 queue_init(&purgeable_queues[i].objq[group]);
553
554 purgeable_queues[i].type = i;
555 purgeable_queues[i].new_pages = 0;
556 #if MACH_ASSERT
557 purgeable_queues[i].debug_count_tokens = 0;
558 purgeable_queues[i].debug_count_objects = 0;
559 #endif
560 };
561
562 for (i = 0; i < MAX_COLORS; i++ )
563 queue_init(&vm_page_queue_free[i]);
564 queue_init(&vm_lopage_queue_free);
565 vm_page_queue_fictitious = VM_PAGE_NULL;
566 queue_init(&vm_page_queue_active);
567 queue_init(&vm_page_queue_inactive);
568 queue_init(&vm_page_queue_throttled);
569 queue_init(&vm_page_queue_zf);
570
571 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
572 queue_init(&vm_page_queue_speculative[i].age_q);
573
574 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
575 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
576 }
577 vm_page_free_wanted = 0;
578 vm_page_free_wanted_privileged = 0;
579
580 vm_page_set_colors();
581
582
583 /*
584 * Steal memory for the map and zone subsystems.
585 */
586
587 vm_map_steal_memory();
588 zone_steal_memory();
589
590 /*
591 * Allocate (and initialize) the virtual-to-physical
592 * table hash buckets.
593 *
594 * The number of buckets should be a power of two to
595 * get a good hash function. The following computation
596 * chooses the first power of two that is greater
597 * than the number of physical pages in the system.
598 */
599
600 if (vm_page_bucket_count == 0) {
601 unsigned int npages = pmap_free_pages();
602
603 vm_page_bucket_count = 1;
604 while (vm_page_bucket_count < npages)
605 vm_page_bucket_count <<= 1;
606 }
607 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
608
609 vm_page_hash_mask = vm_page_bucket_count - 1;
610
611 /*
612 * Calculate object shift value for hashing algorithm:
613 * O = log2(sizeof(struct vm_object))
614 * B = log2(vm_page_bucket_count)
615 * hash shifts the object left by
616 * B/2 - O
617 */
618 size = vm_page_bucket_count;
619 for (log1 = 0; size > 1; log1++)
620 size /= 2;
621 size = sizeof(struct vm_object);
622 for (log2 = 0; size > 1; log2++)
623 size /= 2;
624 vm_page_hash_shift = log1/2 - log2 + 1;
625
626 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
627 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
628 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
629
630 if (vm_page_hash_mask & vm_page_bucket_count)
631 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
632
633 vm_page_buckets = (vm_page_bucket_t *)
634 pmap_steal_memory(vm_page_bucket_count *
635 sizeof(vm_page_bucket_t));
636
637 vm_page_bucket_locks = (lck_spin_t *)
638 pmap_steal_memory(vm_page_bucket_lock_count *
639 sizeof(lck_spin_t));
640
641 for (i = 0; i < vm_page_bucket_count; i++) {
642 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
643
644 bucket->pages = VM_PAGE_NULL;
645 #if MACH_PAGE_HASH_STATS
646 bucket->cur_count = 0;
647 bucket->hi_count = 0;
648 #endif /* MACH_PAGE_HASH_STATS */
649 }
650
651 for (i = 0; i < vm_page_bucket_lock_count; i++)
652 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
653
654 /*
655 * Machine-dependent code allocates the resident page table.
656 * It uses vm_page_init to initialize the page frames.
657 * The code also returns to us the virtual space available
658 * to the kernel. We don't trust the pmap module
659 * to get the alignment right.
660 */
661
662 pmap_startup(&virtual_space_start, &virtual_space_end);
663 virtual_space_start = round_page(virtual_space_start);
664 virtual_space_end = trunc_page(virtual_space_end);
665
666 *startp = virtual_space_start;
667 *endp = virtual_space_end;
668
669 /*
670 * Compute the initial "wire" count.
671 * Up until now, the pages which have been set aside are not under
672 * the VM system's control, so although they aren't explicitly
673 * wired, they nonetheless can't be moved. At this moment,
674 * all VM managed pages are "free", courtesy of pmap_startup.
675 */
676 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
677 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count; /* initial value */
678 vm_page_free_count_minimum = vm_page_free_count;
679
680 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
681 vm_page_free_count, vm_page_wire_count);
682
683 simple_lock_init(&vm_paging_lock, 0);
684 }
685
686 #ifndef MACHINE_PAGES
687 /*
688 * We implement pmap_steal_memory and pmap_startup with the help
689 * of two simpler functions, pmap_virtual_space and pmap_next_page.
690 */
691
692 void *
693 pmap_steal_memory(
694 vm_size_t size)
695 {
696 vm_offset_t addr, vaddr;
697 ppnum_t phys_page;
698
699 /*
700 * We round the size to a round multiple.
701 */
702
703 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
704
705 /*
706 * If this is the first call to pmap_steal_memory,
707 * we have to initialize ourself.
708 */
709
710 if (virtual_space_start == virtual_space_end) {
711 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
712
713 /*
714 * The initial values must be aligned properly, and
715 * we don't trust the pmap module to do it right.
716 */
717
718 virtual_space_start = round_page(virtual_space_start);
719 virtual_space_end = trunc_page(virtual_space_end);
720 }
721
722 /*
723 * Allocate virtual memory for this request.
724 */
725
726 addr = virtual_space_start;
727 virtual_space_start += size;
728
729 kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
730
731 /*
732 * Allocate and map physical pages to back new virtual pages.
733 */
734
735 for (vaddr = round_page(addr);
736 vaddr < addr + size;
737 vaddr += PAGE_SIZE) {
738 #if defined(__LP64__)
739 if (!pmap_next_page_k64(&phys_page))
740 #else
741 if (!pmap_next_page(&phys_page))
742 #endif
743
744 panic("pmap_steal_memory");
745
746 /*
747 * XXX Logically, these mappings should be wired,
748 * but some pmap modules barf if they are.
749 */
750 #if defined(__LP64__)
751 pmap_pre_expand(kernel_pmap, vaddr);
752 #endif
753
754 pmap_enter(kernel_pmap, vaddr, phys_page,
755 VM_PROT_READ|VM_PROT_WRITE,
756 VM_WIMG_USE_DEFAULT, FALSE);
757 /*
758 * Account for newly stolen memory
759 */
760 vm_page_wire_count++;
761
762 }
763
764 return (void *) addr;
765 }
766
767 void
768 pmap_startup(
769 vm_offset_t *startp,
770 vm_offset_t *endp)
771 {
772 unsigned int i, npages, pages_initialized, fill, fillval;
773 ppnum_t phys_page;
774 addr64_t tmpaddr;
775 unsigned int num_of_lopages = 0;
776 unsigned int last_index;
777
778 /*
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
781 */
782
783 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
784 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
785 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
786
787 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
788
789 /*
790 * Initialize the page frames.
791 */
792 for (i = 0, pages_initialized = 0; i < npages; i++) {
793 if (!pmap_next_page(&phys_page))
794 break;
795
796 vm_page_init(&vm_pages[i], phys_page);
797 vm_page_pages++;
798 pages_initialized++;
799 }
800 vm_pages_count = pages_initialized;
801
802 /*
803 * Check if we want to initialize pages to a known value
804 */
805 fill = 0; /* Assume no fill */
806 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
807
808
809 /*
810 * if vm_lopage_poolsize is non-zero, than we need to reserve
811 * a pool of pages whose addresess are less than 4G... this pool
812 * is used by drivers whose hardware can't DMA beyond 32 bits...
813 *
814 * note that I'm assuming that the page list is ascending and
815 * ordered w/r to the physical address
816 */
817 for (i = 0, num_of_lopages = vm_lopage_poolsize; num_of_lopages && i < pages_initialized; num_of_lopages--, i++) {
818 vm_page_t m;
819
820 m = &vm_pages[i];
821
822 if (m->phys_page >= (1 << (32 - PAGE_SHIFT)))
823 panic("couldn't reserve the lopage pool: not enough lo pages\n");
824
825 if (m->phys_page < vm_lopage_poolend)
826 panic("couldn't reserve the lopage pool: page list out of order\n");
827
828 vm_lopage_poolend = m->phys_page;
829
830 if (vm_lopage_poolstart == 0)
831 vm_lopage_poolstart = m->phys_page;
832 else {
833 if (m->phys_page < vm_lopage_poolstart)
834 panic("couldn't reserve the lopage pool: page list out of order\n");
835 }
836
837 if (fill)
838 fillPage(m->phys_page, fillval); /* Fill the page with a know value if requested at boot */
839
840 vm_page_release(m);
841 }
842 last_index = i;
843
844 // -debug code remove
845 if (2 == vm_himemory_mode) {
846 // free low -> high so high is preferred
847 for (i = last_index + 1; i <= pages_initialized; i++) {
848 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
849 vm_page_release(&vm_pages[i - 1]);
850 }
851 }
852 else
853 // debug code remove-
854
855 /*
856 * Release pages in reverse order so that physical pages
857 * initially get allocated in ascending addresses. This keeps
858 * the devices (which must address physical memory) happy if
859 * they require several consecutive pages.
860 */
861 for (i = pages_initialized; i > last_index; i--) {
862 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
863 vm_page_release(&vm_pages[i - 1]);
864 }
865
866 #if 0
867 {
868 vm_page_t xx, xxo, xxl;
869 int i, j, k, l;
870
871 j = 0; /* (BRINGUP) */
872 xxl = 0;
873
874 for( i = 0; i < vm_colors; i++ ) {
875 queue_iterate(&vm_page_queue_free[i],
876 xx,
877 vm_page_t,
878 pageq) { /* BRINGUP */
879 j++; /* (BRINGUP) */
880 if(j > vm_page_free_count) { /* (BRINGUP) */
881 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
882 }
883
884 l = vm_page_free_count - j; /* (BRINGUP) */
885 k = 0; /* (BRINGUP) */
886
887 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
888
889 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
890 k++;
891 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
892 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
893 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
894 }
895 }
896
897 xxl = xx;
898 }
899 }
900
901 if(j != vm_page_free_count) { /* (BRINGUP) */
902 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
903 }
904 }
905 #endif
906
907
908 /*
909 * We have to re-align virtual_space_start,
910 * because pmap_steal_memory has been using it.
911 */
912
913 virtual_space_start = round_page(virtual_space_start);
914
915 *startp = virtual_space_start;
916 *endp = virtual_space_end;
917 }
918 #endif /* MACHINE_PAGES */
919
920 /*
921 * Routine: vm_page_module_init
922 * Purpose:
923 * Second initialization pass, to be done after
924 * the basic VM system is ready.
925 */
926 void
927 vm_page_module_init(void)
928 {
929 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
930 0, PAGE_SIZE, "vm pages");
931
932 #if ZONE_DEBUG
933 zone_debug_disable(vm_page_zone);
934 #endif /* ZONE_DEBUG */
935
936 zone_change(vm_page_zone, Z_EXPAND, FALSE);
937 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
938 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
939
940 /*
941 * Adjust zone statistics to account for the real pages allocated
942 * in vm_page_create(). [Q: is this really what we want?]
943 */
944 vm_page_zone->count += vm_page_pages;
945 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
946
947 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
948 }
949
950 /*
951 * Routine: vm_page_create
952 * Purpose:
953 * After the VM system is up, machine-dependent code
954 * may stumble across more physical memory. For example,
955 * memory that it was reserving for a frame buffer.
956 * vm_page_create turns this memory into available pages.
957 */
958
959 void
960 vm_page_create(
961 ppnum_t start,
962 ppnum_t end)
963 {
964 ppnum_t phys_page;
965 vm_page_t m;
966
967 for (phys_page = start;
968 phys_page < end;
969 phys_page++) {
970 while ((m = (vm_page_t) vm_page_grab_fictitious())
971 == VM_PAGE_NULL)
972 vm_page_more_fictitious();
973
974 vm_page_init(m, phys_page);
975 vm_page_pages++;
976 vm_page_release(m);
977 }
978 }
979
980 /*
981 * vm_page_hash:
982 *
983 * Distributes the object/offset key pair among hash buckets.
984 *
985 * NOTE: The bucket count must be a power of 2
986 */
987 #define vm_page_hash(object, offset) (\
988 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
989 & vm_page_hash_mask)
990
991
992 /*
993 * vm_page_insert: [ internal use only ]
994 *
995 * Inserts the given mem entry into the object/object-page
996 * table and object list.
997 *
998 * The object must be locked.
999 */
1000 void
1001 vm_page_insert(
1002 vm_page_t mem,
1003 vm_object_t object,
1004 vm_object_offset_t offset)
1005 {
1006 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
1007 }
1008
1009 void
1010 vm_page_insert_internal(
1011 vm_page_t mem,
1012 vm_object_t object,
1013 vm_object_offset_t offset,
1014 boolean_t queues_lock_held,
1015 boolean_t insert_in_hash)
1016 {
1017 vm_page_bucket_t *bucket;
1018 lck_spin_t *bucket_lock;
1019 int hash_id;
1020
1021 XPR(XPR_VM_PAGE,
1022 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1023 object, offset, mem, 0,0);
1024
1025 VM_PAGE_CHECK(mem);
1026
1027 if (object == vm_submap_object) {
1028 /* the vm_submap_object is only a placeholder for submaps */
1029 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1030 }
1031
1032 vm_object_lock_assert_exclusive(object);
1033 #if DEBUG
1034 lck_mtx_assert(&vm_page_queue_lock,
1035 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1036 : LCK_MTX_ASSERT_NOTOWNED);
1037 #endif /* DEBUG */
1038
1039 if (insert_in_hash == TRUE) {
1040 #if DEBUG
1041 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1042 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1043 "already in (obj=%p,off=0x%llx)",
1044 mem, object, offset, mem->object, mem->offset);
1045 #endif
1046 assert(!object->internal || offset < object->size);
1047
1048 /* only insert "pageout" pages into "pageout" objects,
1049 * and normal pages into normal objects */
1050 assert(object->pageout == mem->pageout);
1051
1052 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1053
1054 /*
1055 * Record the object/offset pair in this page
1056 */
1057
1058 mem->object = object;
1059 mem->offset = offset;
1060
1061 /*
1062 * Insert it into the object_object/offset hash table
1063 */
1064 hash_id = vm_page_hash(object, offset);
1065 bucket = &vm_page_buckets[hash_id];
1066 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1067
1068 lck_spin_lock(bucket_lock);
1069
1070 mem->next = bucket->pages;
1071 bucket->pages = mem;
1072 #if MACH_PAGE_HASH_STATS
1073 if (++bucket->cur_count > bucket->hi_count)
1074 bucket->hi_count = bucket->cur_count;
1075 #endif /* MACH_PAGE_HASH_STATS */
1076
1077 lck_spin_unlock(bucket_lock);
1078 }
1079 /*
1080 * Now link into the object's list of backed pages.
1081 */
1082
1083 VM_PAGE_INSERT(mem, object);
1084 mem->tabled = TRUE;
1085
1086 /*
1087 * Show that the object has one more resident page.
1088 */
1089
1090 object->resident_page_count++;
1091 if (VM_PAGE_WIRED(mem)) {
1092 object->wired_page_count++;
1093 }
1094 assert(object->resident_page_count >= object->wired_page_count);
1095
1096 assert(!mem->reusable);
1097
1098 if (object->purgable == VM_PURGABLE_VOLATILE) {
1099 if (VM_PAGE_WIRED(mem)) {
1100 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1101 } else {
1102 OSAddAtomic(1, &vm_page_purgeable_count);
1103 }
1104 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1105 mem->throttled) {
1106 /*
1107 * This page belongs to a purged VM object but hasn't
1108 * been purged (because it was "busy").
1109 * It's in the "throttled" queue and hence not
1110 * visible to vm_pageout_scan(). Move it to a pageable
1111 * queue, so that it can eventually be reclaimed, instead
1112 * of lingering in the "empty" object.
1113 */
1114 if (queues_lock_held == FALSE)
1115 vm_page_lockspin_queues();
1116 vm_page_deactivate(mem);
1117 if (queues_lock_held == FALSE)
1118 vm_page_unlock_queues();
1119 }
1120 }
1121
1122 /*
1123 * vm_page_replace:
1124 *
1125 * Exactly like vm_page_insert, except that we first
1126 * remove any existing page at the given offset in object.
1127 *
1128 * The object must be locked.
1129 */
1130 void
1131 vm_page_replace(
1132 register vm_page_t mem,
1133 register vm_object_t object,
1134 register vm_object_offset_t offset)
1135 {
1136 vm_page_bucket_t *bucket;
1137 vm_page_t found_m = VM_PAGE_NULL;
1138 lck_spin_t *bucket_lock;
1139 int hash_id;
1140
1141 VM_PAGE_CHECK(mem);
1142 vm_object_lock_assert_exclusive(object);
1143 #if DEBUG
1144 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1145 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1146 "already in (obj=%p,off=0x%llx)",
1147 mem, object, offset, mem->object, mem->offset);
1148 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1149 #endif
1150 /*
1151 * Record the object/offset pair in this page
1152 */
1153
1154 mem->object = object;
1155 mem->offset = offset;
1156
1157 /*
1158 * Insert it into the object_object/offset hash table,
1159 * replacing any page that might have been there.
1160 */
1161
1162 hash_id = vm_page_hash(object, offset);
1163 bucket = &vm_page_buckets[hash_id];
1164 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1165
1166 lck_spin_lock(bucket_lock);
1167
1168 if (bucket->pages) {
1169 vm_page_t *mp = &bucket->pages;
1170 vm_page_t m = *mp;
1171
1172 do {
1173 if (m->object == object && m->offset == offset) {
1174 /*
1175 * Remove old page from hash list
1176 */
1177 *mp = m->next;
1178
1179 found_m = m;
1180 break;
1181 }
1182 mp = &m->next;
1183 } while ((m = *mp));
1184
1185 mem->next = bucket->pages;
1186 } else {
1187 mem->next = VM_PAGE_NULL;
1188 }
1189 /*
1190 * insert new page at head of hash list
1191 */
1192 bucket->pages = mem;
1193
1194 lck_spin_unlock(bucket_lock);
1195
1196 if (found_m) {
1197 /*
1198 * there was already a page at the specified
1199 * offset for this object... remove it from
1200 * the object and free it back to the free list
1201 */
1202 vm_page_free_unlocked(found_m, FALSE);
1203 }
1204 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1205 }
1206
1207 /*
1208 * vm_page_remove: [ internal use only ]
1209 *
1210 * Removes the given mem entry from the object/offset-page
1211 * table and the object page list.
1212 *
1213 * The object must be locked.
1214 */
1215
1216 void
1217 vm_page_remove(
1218 vm_page_t mem,
1219 boolean_t remove_from_hash)
1220 {
1221 vm_page_bucket_t *bucket;
1222 vm_page_t this;
1223 lck_spin_t *bucket_lock;
1224 int hash_id;
1225
1226 XPR(XPR_VM_PAGE,
1227 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1228 mem->object, mem->offset,
1229 mem, 0,0);
1230
1231 vm_object_lock_assert_exclusive(mem->object);
1232 assert(mem->tabled);
1233 assert(!mem->cleaning);
1234 VM_PAGE_CHECK(mem);
1235
1236 if (remove_from_hash == TRUE) {
1237 /*
1238 * Remove from the object_object/offset hash table
1239 */
1240 hash_id = vm_page_hash(mem->object, mem->offset);
1241 bucket = &vm_page_buckets[hash_id];
1242 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1243
1244 lck_spin_lock(bucket_lock);
1245
1246 if ((this = bucket->pages) == mem) {
1247 /* optimize for common case */
1248
1249 bucket->pages = mem->next;
1250 } else {
1251 vm_page_t *prev;
1252
1253 for (prev = &this->next;
1254 (this = *prev) != mem;
1255 prev = &this->next)
1256 continue;
1257 *prev = this->next;
1258 }
1259 #if MACH_PAGE_HASH_STATS
1260 bucket->cur_count--;
1261 #endif /* MACH_PAGE_HASH_STATS */
1262
1263 lck_spin_unlock(bucket_lock);
1264 }
1265 /*
1266 * Now remove from the object's list of backed pages.
1267 */
1268
1269 VM_PAGE_REMOVE(mem);
1270
1271 /*
1272 * And show that the object has one fewer resident
1273 * page.
1274 */
1275
1276 assert(mem->object->resident_page_count > 0);
1277 mem->object->resident_page_count--;
1278 if (VM_PAGE_WIRED(mem)) {
1279 assert(mem->object->wired_page_count > 0);
1280 mem->object->wired_page_count--;
1281 }
1282 assert(mem->object->resident_page_count >=
1283 mem->object->wired_page_count);
1284 if (mem->reusable) {
1285 assert(mem->object->reusable_page_count > 0);
1286 mem->object->reusable_page_count--;
1287 assert(mem->object->reusable_page_count <=
1288 mem->object->resident_page_count);
1289 mem->reusable = FALSE;
1290 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1291 vm_page_stats_reusable.reused_remove++;
1292 } else if (mem->object->all_reusable) {
1293 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1294 vm_page_stats_reusable.reused_remove++;
1295 }
1296
1297 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1298 if (VM_PAGE_WIRED(mem)) {
1299 assert(vm_page_purgeable_wired_count > 0);
1300 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1301 } else {
1302 assert(vm_page_purgeable_count > 0);
1303 OSAddAtomic(-1, &vm_page_purgeable_count);
1304 }
1305 }
1306 mem->tabled = FALSE;
1307 mem->object = VM_OBJECT_NULL;
1308 mem->offset = (vm_object_offset_t) -1;
1309 }
1310
1311
1312 /*
1313 * vm_page_lookup:
1314 *
1315 * Returns the page associated with the object/offset
1316 * pair specified; if none is found, VM_PAGE_NULL is returned.
1317 *
1318 * The object must be locked. No side effects.
1319 */
1320
1321 unsigned long vm_page_lookup_hint = 0;
1322 unsigned long vm_page_lookup_hint_next = 0;
1323 unsigned long vm_page_lookup_hint_prev = 0;
1324 unsigned long vm_page_lookup_hint_miss = 0;
1325 unsigned long vm_page_lookup_bucket_NULL = 0;
1326 unsigned long vm_page_lookup_miss = 0;
1327
1328
1329 vm_page_t
1330 vm_page_lookup(
1331 vm_object_t object,
1332 vm_object_offset_t offset)
1333 {
1334 vm_page_t mem;
1335 vm_page_bucket_t *bucket;
1336 queue_entry_t qe;
1337 lck_spin_t *bucket_lock;
1338 int hash_id;
1339
1340 vm_object_lock_assert_held(object);
1341 mem = object->memq_hint;
1342
1343 if (mem != VM_PAGE_NULL) {
1344 assert(mem->object == object);
1345
1346 if (mem->offset == offset) {
1347 vm_page_lookup_hint++;
1348 return mem;
1349 }
1350 qe = queue_next(&mem->listq);
1351
1352 if (! queue_end(&object->memq, qe)) {
1353 vm_page_t next_page;
1354
1355 next_page = (vm_page_t) qe;
1356 assert(next_page->object == object);
1357
1358 if (next_page->offset == offset) {
1359 vm_page_lookup_hint_next++;
1360 object->memq_hint = next_page; /* new hint */
1361 return next_page;
1362 }
1363 }
1364 qe = queue_prev(&mem->listq);
1365
1366 if (! queue_end(&object->memq, qe)) {
1367 vm_page_t prev_page;
1368
1369 prev_page = (vm_page_t) qe;
1370 assert(prev_page->object == object);
1371
1372 if (prev_page->offset == offset) {
1373 vm_page_lookup_hint_prev++;
1374 object->memq_hint = prev_page; /* new hint */
1375 return prev_page;
1376 }
1377 }
1378 }
1379 /*
1380 * Search the hash table for this object/offset pair
1381 */
1382 hash_id = vm_page_hash(object, offset);
1383 bucket = &vm_page_buckets[hash_id];
1384
1385 /*
1386 * since we hold the object lock, we are guaranteed that no
1387 * new pages can be inserted into this object... this in turn
1388 * guarantess that the page we're looking for can't exist
1389 * if the bucket it hashes to is currently NULL even when looked
1390 * at outside the scope of the hash bucket lock... this is a
1391 * really cheap optimiztion to avoid taking the lock
1392 */
1393 if (bucket->pages == VM_PAGE_NULL) {
1394 vm_page_lookup_bucket_NULL++;
1395
1396 return (VM_PAGE_NULL);
1397 }
1398 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1399
1400 lck_spin_lock(bucket_lock);
1401
1402 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1403 VM_PAGE_CHECK(mem);
1404 if ((mem->object == object) && (mem->offset == offset))
1405 break;
1406 }
1407 lck_spin_unlock(bucket_lock);
1408
1409 if (mem != VM_PAGE_NULL) {
1410 if (object->memq_hint != VM_PAGE_NULL) {
1411 vm_page_lookup_hint_miss++;
1412 }
1413 assert(mem->object == object);
1414 object->memq_hint = mem;
1415 } else
1416 vm_page_lookup_miss++;
1417
1418 return(mem);
1419 }
1420
1421
1422 /*
1423 * vm_page_rename:
1424 *
1425 * Move the given memory entry from its
1426 * current object to the specified target object/offset.
1427 *
1428 * The object must be locked.
1429 */
1430 void
1431 vm_page_rename(
1432 register vm_page_t mem,
1433 register vm_object_t new_object,
1434 vm_object_offset_t new_offset,
1435 boolean_t encrypted_ok)
1436 {
1437 assert(mem->object != new_object);
1438
1439 /*
1440 * ENCRYPTED SWAP:
1441 * The encryption key is based on the page's memory object
1442 * (aka "pager") and paging offset. Moving the page to
1443 * another VM object changes its "pager" and "paging_offset"
1444 * so it has to be decrypted first, or we would lose the key.
1445 *
1446 * One exception is VM object collapsing, where we transfer pages
1447 * from one backing object to its parent object. This operation also
1448 * transfers the paging information, so the <pager,paging_offset> info
1449 * should remain consistent. The caller (vm_object_do_collapse())
1450 * sets "encrypted_ok" in this case.
1451 */
1452 if (!encrypted_ok && mem->encrypted) {
1453 panic("vm_page_rename: page %p is encrypted\n", mem);
1454 }
1455
1456 XPR(XPR_VM_PAGE,
1457 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1458 new_object, new_offset,
1459 mem, 0,0);
1460
1461 /*
1462 * Changes to mem->object require the page lock because
1463 * the pageout daemon uses that lock to get the object.
1464 */
1465 vm_page_lockspin_queues();
1466
1467 vm_page_remove(mem, TRUE);
1468 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1469
1470 vm_page_unlock_queues();
1471 }
1472
1473 /*
1474 * vm_page_init:
1475 *
1476 * Initialize the fields in a new page.
1477 * This takes a structure with random values and initializes it
1478 * so that it can be given to vm_page_release or vm_page_insert.
1479 */
1480 void
1481 vm_page_init(
1482 vm_page_t mem,
1483 ppnum_t phys_page)
1484 {
1485 assert(phys_page);
1486 *mem = vm_page_template;
1487 mem->phys_page = phys_page;
1488 }
1489
1490 /*
1491 * vm_page_grab_fictitious:
1492 *
1493 * Remove a fictitious page from the free list.
1494 * Returns VM_PAGE_NULL if there are no free pages.
1495 */
1496 int c_vm_page_grab_fictitious = 0;
1497 int c_vm_page_release_fictitious = 0;
1498 int c_vm_page_more_fictitious = 0;
1499
1500 extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
1501
1502 vm_page_t
1503 vm_page_grab_fictitious_common(
1504 ppnum_t phys_addr)
1505 {
1506 register vm_page_t m;
1507
1508 m = (vm_page_t)zget(vm_page_zone);
1509 if (m) {
1510 vm_page_init(m, phys_addr);
1511 m->fictitious = TRUE;
1512 }
1513
1514 c_vm_page_grab_fictitious++;
1515 return m;
1516 }
1517
1518 vm_page_t
1519 vm_page_grab_fictitious(void)
1520 {
1521 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1522 }
1523
1524 vm_page_t
1525 vm_page_grab_guard(void)
1526 {
1527 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1528 }
1529
1530 /*
1531 * vm_page_release_fictitious:
1532 *
1533 * Release a fictitious page to the free list.
1534 */
1535
1536 void
1537 vm_page_release_fictitious(
1538 register vm_page_t m)
1539 {
1540 assert(!m->free);
1541 assert(m->busy);
1542 assert(m->fictitious);
1543 assert(m->phys_page == vm_page_fictitious_addr ||
1544 m->phys_page == vm_page_guard_addr);
1545
1546 c_vm_page_release_fictitious++;
1547 #if DEBUG
1548 if (m->free)
1549 panic("vm_page_release_fictitious");
1550 #endif
1551 m->free = TRUE;
1552 zfree(vm_page_zone, m);
1553 }
1554
1555 /*
1556 * vm_page_more_fictitious:
1557 *
1558 * Add more fictitious pages to the free list.
1559 * Allowed to block. This routine is way intimate
1560 * with the zones code, for several reasons:
1561 * 1. we need to carve some page structures out of physical
1562 * memory before zones work, so they _cannot_ come from
1563 * the zone_map.
1564 * 2. the zone needs to be collectable in order to prevent
1565 * growth without bound. These structures are used by
1566 * the device pager (by the hundreds and thousands), as
1567 * private pages for pageout, and as blocking pages for
1568 * pagein. Temporary bursts in demand should not result in
1569 * permanent allocation of a resource.
1570 * 3. To smooth allocation humps, we allocate single pages
1571 * with kernel_memory_allocate(), and cram them into the
1572 * zone. This also allows us to initialize the vm_page_t's
1573 * on the way into the zone, so that zget() always returns
1574 * an initialized structure. The zone free element pointer
1575 * and the free page pointer are both the first item in the
1576 * vm_page_t.
1577 * 4. By having the pages in the zone pre-initialized, we need
1578 * not keep 2 levels of lists. The garbage collector simply
1579 * scans our list, and reduces physical memory usage as it
1580 * sees fit.
1581 */
1582
1583 void vm_page_more_fictitious(void)
1584 {
1585 register vm_page_t m;
1586 vm_offset_t addr;
1587 kern_return_t retval;
1588 int i;
1589
1590 c_vm_page_more_fictitious++;
1591
1592 /*
1593 * Allocate a single page from the zone_map. Do not wait if no physical
1594 * pages are immediately available, and do not zero the space. We need
1595 * our own blocking lock here to prevent having multiple,
1596 * simultaneous requests from piling up on the zone_map lock. Exactly
1597 * one (of our) threads should be potentially waiting on the map lock.
1598 * If winner is not vm-privileged, then the page allocation will fail,
1599 * and it will temporarily block here in the vm_page_wait().
1600 */
1601 lck_mtx_lock(&vm_page_alloc_lock);
1602 /*
1603 * If another thread allocated space, just bail out now.
1604 */
1605 if (zone_free_count(vm_page_zone) > 5) {
1606 /*
1607 * The number "5" is a small number that is larger than the
1608 * number of fictitious pages that any single caller will
1609 * attempt to allocate. Otherwise, a thread will attempt to
1610 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1611 * release all of the resources and locks already acquired,
1612 * and then call this routine. This routine finds the pages
1613 * that the caller released, so fails to allocate new space.
1614 * The process repeats infinitely. The largest known number
1615 * of fictitious pages required in this manner is 2. 5 is
1616 * simply a somewhat larger number.
1617 */
1618 lck_mtx_unlock(&vm_page_alloc_lock);
1619 return;
1620 }
1621
1622 retval = kernel_memory_allocate(zone_map,
1623 &addr, PAGE_SIZE, VM_PROT_ALL,
1624 KMA_KOBJECT|KMA_NOPAGEWAIT);
1625 if (retval != KERN_SUCCESS) {
1626 /*
1627 * No page was available. Tell the pageout daemon, drop the
1628 * lock to give another thread a chance at it, and
1629 * wait for the pageout daemon to make progress.
1630 */
1631 lck_mtx_unlock(&vm_page_alloc_lock);
1632 vm_page_wait(THREAD_UNINT);
1633 return;
1634 }
1635 /*
1636 * Initialize as many vm_page_t's as will fit on this page. This
1637 * depends on the zone code disturbing ONLY the first item of
1638 * each zone element.
1639 */
1640 m = (vm_page_t)addr;
1641 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1642 vm_page_init(m, vm_page_fictitious_addr);
1643 m->fictitious = TRUE;
1644 m++;
1645 }
1646 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1647 lck_mtx_unlock(&vm_page_alloc_lock);
1648 }
1649
1650
1651 /*
1652 * vm_pool_low():
1653 *
1654 * Return true if it is not likely that a non-vm_privileged thread
1655 * can get memory without blocking. Advisory only, since the
1656 * situation may change under us.
1657 */
1658 int
1659 vm_pool_low(void)
1660 {
1661 /* No locking, at worst we will fib. */
1662 return( vm_page_free_count <= vm_page_free_reserved );
1663 }
1664
1665
1666
1667 /*
1668 * this is an interface to support bring-up of drivers
1669 * on platforms with physical memory > 4G...
1670 */
1671 int vm_himemory_mode = 0;
1672
1673
1674 /*
1675 * this interface exists to support hardware controllers
1676 * incapable of generating DMAs with more than 32 bits
1677 * of address on platforms with physical memory > 4G...
1678 */
1679 unsigned int vm_lopage_free_count = 0;
1680 unsigned int vm_lopage_max_count = 0;
1681 queue_head_t vm_lopage_queue_free;
1682
1683 vm_page_t
1684 vm_page_grablo(void)
1685 {
1686 register vm_page_t mem;
1687 unsigned int vm_lopage_alloc_count;
1688
1689 if (vm_lopage_poolsize == 0)
1690 return (vm_page_grab());
1691
1692 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1693
1694 if (! queue_empty(&vm_lopage_queue_free)) {
1695 queue_remove_first(&vm_lopage_queue_free,
1696 mem,
1697 vm_page_t,
1698 pageq);
1699 assert(mem->free);
1700 assert(mem->busy);
1701 assert(!mem->pmapped);
1702 assert(!mem->wpmapped);
1703
1704 mem->pageq.next = NULL;
1705 mem->pageq.prev = NULL;
1706 mem->free = FALSE;
1707
1708 vm_lopage_free_count--;
1709 vm_lopage_alloc_count = (vm_lopage_poolend - vm_lopage_poolstart) - vm_lopage_free_count;
1710 if (vm_lopage_alloc_count > vm_lopage_max_count)
1711 vm_lopage_max_count = vm_lopage_alloc_count;
1712 } else {
1713 mem = VM_PAGE_NULL;
1714 }
1715 lck_mtx_unlock(&vm_page_queue_free_lock);
1716
1717 return (mem);
1718 }
1719
1720
1721 /*
1722 * vm_page_grab:
1723 *
1724 * first try to grab a page from the per-cpu free list...
1725 * this must be done while pre-emption is disabled... if
1726 * a page is available, we're done...
1727 * if no page is available, grab the vm_page_queue_free_lock
1728 * and see if current number of free pages would allow us
1729 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1730 * if there are pages available, disable preemption and
1731 * recheck the state of the per-cpu free list... we could
1732 * have been preempted and moved to a different cpu, or
1733 * some other thread could have re-filled it... if still
1734 * empty, figure out how many pages we can steal from the
1735 * global free queue and move to the per-cpu queue...
1736 * return 1 of these pages when done... only wakeup the
1737 * pageout_scan thread if we moved pages from the global
1738 * list... no need for the wakeup if we've satisfied the
1739 * request from the per-cpu queue.
1740 */
1741
1742 #define COLOR_GROUPS_TO_STEAL 4
1743
1744
1745 vm_page_t
1746 vm_page_grab( void )
1747 {
1748 vm_page_t mem;
1749
1750
1751 disable_preemption();
1752
1753 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1754 return_page_from_cpu_list:
1755 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1756 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1757 mem->pageq.next = NULL;
1758
1759 enable_preemption();
1760
1761 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1762 assert(mem->tabled == FALSE);
1763 assert(mem->object == VM_OBJECT_NULL);
1764 assert(!mem->laundry);
1765 assert(!mem->free);
1766 assert(pmap_verify_free(mem->phys_page));
1767 assert(mem->busy);
1768 assert(!mem->encrypted);
1769 assert(!mem->pmapped);
1770 assert(!mem->wpmapped);
1771
1772 return mem;
1773 }
1774 enable_preemption();
1775
1776
1777 /*
1778 * Optionally produce warnings if the wire or gobble
1779 * counts exceed some threshold.
1780 */
1781 if (vm_page_wire_count_warning > 0
1782 && vm_page_wire_count >= vm_page_wire_count_warning) {
1783 printf("mk: vm_page_grab(): high wired page count of %d\n",
1784 vm_page_wire_count);
1785 assert(vm_page_wire_count < vm_page_wire_count_warning);
1786 }
1787 if (vm_page_gobble_count_warning > 0
1788 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1789 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1790 vm_page_gobble_count);
1791 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1792 }
1793
1794 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1795
1796 /*
1797 * Only let privileged threads (involved in pageout)
1798 * dip into the reserved pool.
1799 */
1800 if ((vm_page_free_count < vm_page_free_reserved) &&
1801 !(current_thread()->options & TH_OPT_VMPRIV)) {
1802 lck_mtx_unlock(&vm_page_queue_free_lock);
1803 mem = VM_PAGE_NULL;
1804 }
1805 else {
1806 vm_page_t head;
1807 vm_page_t tail;
1808 unsigned int pages_to_steal;
1809 unsigned int color;
1810
1811 while ( vm_page_free_count == 0 ) {
1812
1813 lck_mtx_unlock(&vm_page_queue_free_lock);
1814 /*
1815 * must be a privileged thread to be
1816 * in this state since a non-privileged
1817 * thread would have bailed if we were
1818 * under the vm_page_free_reserved mark
1819 */
1820 VM_PAGE_WAIT();
1821 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1822 }
1823
1824 disable_preemption();
1825
1826 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1827 lck_mtx_unlock(&vm_page_queue_free_lock);
1828
1829 /*
1830 * we got preempted and moved to another processor
1831 * or we got preempted and someone else ran and filled the cache
1832 */
1833 goto return_page_from_cpu_list;
1834 }
1835 if (vm_page_free_count <= vm_page_free_reserved)
1836 pages_to_steal = 1;
1837 else {
1838 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1839
1840 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1841 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1842 }
1843 color = PROCESSOR_DATA(current_processor(), start_color);
1844 head = tail = NULL;
1845
1846 while (pages_to_steal--) {
1847 if (--vm_page_free_count < vm_page_free_count_minimum)
1848 vm_page_free_count_minimum = vm_page_free_count;
1849
1850 while (queue_empty(&vm_page_queue_free[color]))
1851 color = (color + 1) & vm_color_mask;
1852
1853 queue_remove_first(&vm_page_queue_free[color],
1854 mem,
1855 vm_page_t,
1856 pageq);
1857 mem->pageq.next = NULL;
1858 mem->pageq.prev = NULL;
1859
1860 color = (color + 1) & vm_color_mask;
1861
1862 if (head == NULL)
1863 head = mem;
1864 else
1865 tail->pageq.next = (queue_t)mem;
1866 tail = mem;
1867
1868 mem->pageq.prev = NULL;
1869 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1870 assert(mem->tabled == FALSE);
1871 assert(mem->object == VM_OBJECT_NULL);
1872 assert(!mem->laundry);
1873 assert(mem->free);
1874 mem->free = FALSE;
1875
1876 assert(pmap_verify_free(mem->phys_page));
1877 assert(mem->busy);
1878 assert(!mem->free);
1879 assert(!mem->encrypted);
1880 assert(!mem->pmapped);
1881 assert(!mem->wpmapped);
1882 }
1883 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1884 PROCESSOR_DATA(current_processor(), start_color) = color;
1885
1886 /*
1887 * satisfy this request
1888 */
1889 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1890 mem = head;
1891 mem->pageq.next = NULL;
1892
1893 lck_mtx_unlock(&vm_page_queue_free_lock);
1894
1895 enable_preemption();
1896 }
1897 /*
1898 * Decide if we should poke the pageout daemon.
1899 * We do this if the free count is less than the low
1900 * water mark, or if the free count is less than the high
1901 * water mark (but above the low water mark) and the inactive
1902 * count is less than its target.
1903 *
1904 * We don't have the counts locked ... if they change a little,
1905 * it doesn't really matter.
1906 */
1907 if ((vm_page_free_count < vm_page_free_min) ||
1908 ((vm_page_free_count < vm_page_free_target) &&
1909 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1910 thread_wakeup((event_t) &vm_page_free_wanted);
1911
1912 #if CONFIG_EMBEDDED
1913 {
1914 int percent_avail;
1915
1916 /*
1917 * Decide if we need to poke the memorystatus notification thread.
1918 */
1919 percent_avail =
1920 (vm_page_active_count + vm_page_inactive_count +
1921 vm_page_speculative_count + vm_page_free_count +
1922 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1923 atop_64(max_mem);
1924 if (percent_avail <= (kern_memorystatus_level - 5)) {
1925 kern_memorystatus_level = percent_avail;
1926 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1927 }
1928 }
1929 #endif
1930
1931 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1932
1933 return mem;
1934 }
1935
1936 /*
1937 * vm_page_release:
1938 *
1939 * Return a page to the free list.
1940 */
1941
1942 void
1943 vm_page_release(
1944 register vm_page_t mem)
1945 {
1946 unsigned int color;
1947 int need_wakeup = 0;
1948 int need_priv_wakeup = 0;
1949 #if 0
1950 unsigned int pindex;
1951 phys_entry *physent;
1952
1953 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1954 if(physent->ppLink & ppN) { /* (BRINGUP) */
1955 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1956 }
1957 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1958 #endif
1959 assert(!mem->private && !mem->fictitious);
1960 if (vm_page_free_verify) {
1961 assert(pmap_verify_free(mem->phys_page));
1962 }
1963 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1964
1965
1966 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1967 #if DEBUG
1968 if (mem->free)
1969 panic("vm_page_release");
1970 #endif
1971 mem->free = TRUE;
1972
1973 assert(mem->busy);
1974 assert(!mem->laundry);
1975 assert(mem->object == VM_OBJECT_NULL);
1976 assert(mem->pageq.next == NULL &&
1977 mem->pageq.prev == NULL);
1978 assert(mem->listq.next == NULL &&
1979 mem->listq.prev == NULL);
1980
1981 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
1982 /*
1983 * this exists to support hardware controllers
1984 * incapable of generating DMAs with more than 32 bits
1985 * of address on platforms with physical memory > 4G...
1986 */
1987 queue_enter_first(&vm_lopage_queue_free,
1988 mem,
1989 vm_page_t,
1990 pageq);
1991 vm_lopage_free_count++;
1992 } else {
1993 color = mem->phys_page & vm_color_mask;
1994 queue_enter_first(&vm_page_queue_free[color],
1995 mem,
1996 vm_page_t,
1997 pageq);
1998 vm_page_free_count++;
1999 /*
2000 * Check if we should wake up someone waiting for page.
2001 * But don't bother waking them unless they can allocate.
2002 *
2003 * We wakeup only one thread, to prevent starvation.
2004 * Because the scheduling system handles wait queues FIFO,
2005 * if we wakeup all waiting threads, one greedy thread
2006 * can starve multiple niceguy threads. When the threads
2007 * all wakeup, the greedy threads runs first, grabs the page,
2008 * and waits for another page. It will be the first to run
2009 * when the next page is freed.
2010 *
2011 * However, there is a slight danger here.
2012 * The thread we wake might not use the free page.
2013 * Then the other threads could wait indefinitely
2014 * while the page goes unused. To forestall this,
2015 * the pageout daemon will keep making free pages
2016 * as long as vm_page_free_wanted is non-zero.
2017 */
2018
2019 assert(vm_page_free_count > 0);
2020 if (vm_page_free_wanted_privileged > 0) {
2021 vm_page_free_wanted_privileged--;
2022 need_priv_wakeup = 1;
2023 } else if (vm_page_free_wanted > 0 &&
2024 vm_page_free_count > vm_page_free_reserved) {
2025 vm_page_free_wanted--;
2026 need_wakeup = 1;
2027 }
2028 }
2029 lck_mtx_unlock(&vm_page_queue_free_lock);
2030
2031 if (need_priv_wakeup)
2032 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2033 else if (need_wakeup)
2034 thread_wakeup_one((event_t) &vm_page_free_count);
2035
2036 #if CONFIG_EMBEDDED
2037 {
2038 int percent_avail;
2039
2040 /*
2041 * Decide if we need to poke the memorystatus notification thread.
2042 * Locking is not a big issue, as only a single thread delivers these.
2043 */
2044 percent_avail =
2045 (vm_page_active_count + vm_page_inactive_count +
2046 vm_page_speculative_count + vm_page_free_count +
2047 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2048 atop_64(max_mem);
2049 if (percent_avail >= (kern_memorystatus_level + 5)) {
2050 kern_memorystatus_level = percent_avail;
2051 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2052 }
2053 }
2054 #endif
2055 }
2056
2057 /*
2058 * vm_page_wait:
2059 *
2060 * Wait for a page to become available.
2061 * If there are plenty of free pages, then we don't sleep.
2062 *
2063 * Returns:
2064 * TRUE: There may be another page, try again
2065 * FALSE: We were interrupted out of our wait, don't try again
2066 */
2067
2068 boolean_t
2069 vm_page_wait(
2070 int interruptible )
2071 {
2072 /*
2073 * We can't use vm_page_free_reserved to make this
2074 * determination. Consider: some thread might
2075 * need to allocate two pages. The first allocation
2076 * succeeds, the second fails. After the first page is freed,
2077 * a call to vm_page_wait must really block.
2078 */
2079 kern_return_t wait_result;
2080 int need_wakeup = 0;
2081 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2082
2083 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2084
2085 if (is_privileged && vm_page_free_count) {
2086 lck_mtx_unlock(&vm_page_queue_free_lock);
2087 return TRUE;
2088 }
2089 if (vm_page_free_count < vm_page_free_target) {
2090
2091 if (is_privileged) {
2092 if (vm_page_free_wanted_privileged++ == 0)
2093 need_wakeup = 1;
2094 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2095 } else {
2096 if (vm_page_free_wanted++ == 0)
2097 need_wakeup = 1;
2098 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2099 }
2100 lck_mtx_unlock(&vm_page_queue_free_lock);
2101 counter(c_vm_page_wait_block++);
2102
2103 if (need_wakeup)
2104 thread_wakeup((event_t)&vm_page_free_wanted);
2105
2106 if (wait_result == THREAD_WAITING)
2107 wait_result = thread_block(THREAD_CONTINUE_NULL);
2108
2109 return(wait_result == THREAD_AWAKENED);
2110 } else {
2111 lck_mtx_unlock(&vm_page_queue_free_lock);
2112 return TRUE;
2113 }
2114 }
2115
2116 /*
2117 * vm_page_alloc:
2118 *
2119 * Allocate and return a memory cell associated
2120 * with this VM object/offset pair.
2121 *
2122 * Object must be locked.
2123 */
2124
2125 vm_page_t
2126 vm_page_alloc(
2127 vm_object_t object,
2128 vm_object_offset_t offset)
2129 {
2130 register vm_page_t mem;
2131
2132 vm_object_lock_assert_exclusive(object);
2133 mem = vm_page_grab();
2134 if (mem == VM_PAGE_NULL)
2135 return VM_PAGE_NULL;
2136
2137 vm_page_insert(mem, object, offset);
2138
2139 return(mem);
2140 }
2141
2142 vm_page_t
2143 vm_page_alloclo(
2144 vm_object_t object,
2145 vm_object_offset_t offset)
2146 {
2147 register vm_page_t mem;
2148
2149 vm_object_lock_assert_exclusive(object);
2150 mem = vm_page_grablo();
2151 if (mem == VM_PAGE_NULL)
2152 return VM_PAGE_NULL;
2153
2154 vm_page_insert(mem, object, offset);
2155
2156 return(mem);
2157 }
2158
2159
2160 /*
2161 * vm_page_alloc_guard:
2162 *
2163 * Allocate a fictitious page which will be used
2164 * as a guard page. The page will be inserted into
2165 * the object and returned to the caller.
2166 */
2167
2168 vm_page_t
2169 vm_page_alloc_guard(
2170 vm_object_t object,
2171 vm_object_offset_t offset)
2172 {
2173 register vm_page_t mem;
2174
2175 vm_object_lock_assert_exclusive(object);
2176 mem = vm_page_grab_guard();
2177 if (mem == VM_PAGE_NULL)
2178 return VM_PAGE_NULL;
2179
2180 vm_page_insert(mem, object, offset);
2181
2182 return(mem);
2183 }
2184
2185
2186 counter(unsigned int c_laundry_pages_freed = 0;)
2187
2188 /*
2189 * vm_page_free:
2190 *
2191 * Returns the given page to the free list,
2192 * disassociating it with any VM object.
2193 *
2194 * Object and page queues must be locked prior to entry.
2195 */
2196 static void
2197 vm_page_free_prepare(
2198 register vm_page_t mem)
2199 {
2200 vm_page_free_prepare_queues(mem);
2201 vm_page_free_prepare_object(mem, TRUE);
2202 }
2203
2204
2205 void
2206 vm_page_free_prepare_queues(
2207 vm_page_t mem)
2208 {
2209 VM_PAGE_CHECK(mem);
2210 assert(!mem->free);
2211 assert(!mem->cleaning);
2212 assert(!mem->pageout);
2213 #if DEBUG
2214 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2215 if (mem->free)
2216 panic("vm_page_free: freeing page on free list\n");
2217 #endif
2218 if (mem->object) {
2219 vm_object_lock_assert_exclusive(mem->object);
2220 }
2221
2222 if (mem->laundry) {
2223 /*
2224 * We may have to free a page while it's being laundered
2225 * if we lost its pager (due to a forced unmount, for example).
2226 * We need to call vm_pageout_throttle_up() before removing
2227 * the page from its VM object, so that we can find out on
2228 * which pageout queue the page is on.
2229 */
2230 vm_pageout_throttle_up(mem);
2231 counter(++c_laundry_pages_freed);
2232 }
2233 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2234
2235 if (VM_PAGE_WIRED(mem)) {
2236 if (mem->object) {
2237 assert(mem->object->wired_page_count > 0);
2238 mem->object->wired_page_count--;
2239 assert(mem->object->resident_page_count >=
2240 mem->object->wired_page_count);
2241 }
2242 if (!mem->private && !mem->fictitious)
2243 vm_page_wire_count--;
2244 mem->wire_count = 0;
2245 assert(!mem->gobbled);
2246 } else if (mem->gobbled) {
2247 if (!mem->private && !mem->fictitious)
2248 vm_page_wire_count--;
2249 vm_page_gobble_count--;
2250 }
2251 }
2252
2253
2254 void
2255 vm_page_free_prepare_object(
2256 vm_page_t mem,
2257 boolean_t remove_from_hash)
2258 {
2259 if (mem->object) {
2260 vm_object_lock_assert_exclusive(mem->object);
2261 }
2262
2263 if (mem->tabled)
2264 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2265
2266 PAGE_WAKEUP(mem); /* clears wanted */
2267
2268 if (mem->private) {
2269 mem->private = FALSE;
2270 mem->fictitious = TRUE;
2271 mem->phys_page = vm_page_fictitious_addr;
2272 }
2273 if (mem->fictitious) {
2274 /* Some of these may be unnecessary */
2275 mem->gobbled = FALSE;
2276 mem->busy = TRUE;
2277 mem->absent = FALSE;
2278 mem->error = FALSE;
2279 mem->dirty = FALSE;
2280 mem->precious = FALSE;
2281 mem->reference = FALSE;
2282 mem->encrypted = FALSE;
2283 mem->encrypted_cleaning = FALSE;
2284 mem->pmapped = FALSE;
2285 mem->wpmapped = FALSE;
2286 mem->reusable = FALSE;
2287 } else {
2288 if (mem->zero_fill == TRUE)
2289 VM_ZF_COUNT_DECR();
2290 vm_page_init(mem, mem->phys_page);
2291 }
2292 }
2293
2294
2295 void
2296 vm_page_free(
2297 vm_page_t mem)
2298 {
2299 vm_page_free_prepare(mem);
2300 if (mem->fictitious) {
2301 vm_page_release_fictitious(mem);
2302 } else {
2303 vm_page_release(mem);
2304 }
2305 }
2306
2307
2308 void
2309 vm_page_free_unlocked(
2310 vm_page_t mem,
2311 boolean_t remove_from_hash)
2312 {
2313 vm_page_lockspin_queues();
2314 vm_page_free_prepare_queues(mem);
2315 vm_page_unlock_queues();
2316
2317 vm_page_free_prepare_object(mem, remove_from_hash);
2318
2319 if (mem->fictitious) {
2320 vm_page_release_fictitious(mem);
2321 } else {
2322 vm_page_release(mem);
2323 }
2324 }
2325
2326 /*
2327 * Free a list of pages. The list can be up to several hundred pages,
2328 * as blocked up by vm_pageout_scan().
2329 * The big win is not having to take the free list lock once
2330 * per page. We sort the incoming pages into n lists, one for
2331 * each color.
2332 */
2333 void
2334 vm_page_free_list(
2335 vm_page_t mem,
2336 boolean_t prepare_object)
2337 {
2338 vm_page_t nxt;
2339 int pg_count = 0;
2340 int color;
2341 int inuse_list_head = -1;
2342
2343 queue_head_t free_list[MAX_COLORS];
2344 int inuse[MAX_COLORS];
2345
2346 for (color = 0; color < (signed) vm_colors; color++) {
2347 queue_init(&free_list[color]);
2348 }
2349
2350 while (mem) {
2351 assert(!mem->inactive);
2352 assert(!mem->active);
2353 assert(!mem->throttled);
2354 assert(!mem->free);
2355 assert(!mem->speculative);
2356 assert(mem->pageq.prev == NULL);
2357
2358 nxt = (vm_page_t)(mem->pageq.next);
2359
2360 if (prepare_object == TRUE)
2361 vm_page_free_prepare_object(mem, TRUE);
2362
2363 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2364 assert(pmap_verify_free(mem->phys_page));
2365 }
2366 assert(mem->busy);
2367
2368 if (!mem->fictitious) {
2369 if (mem->phys_page <= vm_lopage_poolend && mem->phys_page >= vm_lopage_poolstart) {
2370 mem->pageq.next = NULL;
2371 vm_page_release(mem);
2372 } else {
2373
2374 /*
2375 * IMPORTANT: we can't set the page "free" here
2376 * because that would make the page eligible for
2377 * a physically-contiguous allocation (see
2378 * vm_page_find_contiguous()) right away (we don't
2379 * hold the vm_page_queue_free lock). That would
2380 * cause trouble because the page is not actually
2381 * in the free queue yet...
2382 */
2383 color = mem->phys_page & vm_color_mask;
2384 if (queue_empty(&free_list[color])) {
2385 inuse[color] = inuse_list_head;
2386 inuse_list_head = color;
2387 }
2388 queue_enter_first(&free_list[color],
2389 mem,
2390 vm_page_t,
2391 pageq);
2392 pg_count++;
2393 }
2394 } else {
2395 assert(mem->phys_page == vm_page_fictitious_addr ||
2396 mem->phys_page == vm_page_guard_addr);
2397 vm_page_release_fictitious(mem);
2398 }
2399 mem = nxt;
2400 }
2401 if (pg_count) {
2402 unsigned int avail_free_count;
2403 unsigned int need_wakeup = 0;
2404 unsigned int need_priv_wakeup = 0;
2405
2406 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2407
2408 color = inuse_list_head;
2409
2410 while( color != -1 ) {
2411 vm_page_t first, last;
2412 vm_page_t first_free;
2413
2414 /*
2415 * Now that we hold the vm_page_queue_free lock,
2416 * it's safe to mark all pages in our local queue
2417 * as "free"...
2418 */
2419 queue_iterate(&free_list[color],
2420 mem,
2421 vm_page_t,
2422 pageq) {
2423 assert(!mem->free);
2424 assert(mem->busy);
2425 mem->free = TRUE;
2426 }
2427
2428 /*
2429 * ... and insert our local queue at the head of
2430 * the global free queue.
2431 */
2432 first = (vm_page_t) queue_first(&free_list[color]);
2433 last = (vm_page_t) queue_last(&free_list[color]);
2434 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2435 if (queue_empty(&vm_page_queue_free[color])) {
2436 queue_last(&vm_page_queue_free[color]) =
2437 (queue_entry_t) last;
2438 } else {
2439 queue_prev(&first_free->pageq) =
2440 (queue_entry_t) last;
2441 }
2442 queue_first(&vm_page_queue_free[color]) =
2443 (queue_entry_t) first;
2444 queue_prev(&first->pageq) =
2445 (queue_entry_t) &vm_page_queue_free[color];
2446 queue_next(&last->pageq) =
2447 (queue_entry_t) first_free;
2448
2449 /* next color */
2450 color = inuse[color];
2451 }
2452
2453 vm_page_free_count += pg_count;
2454 avail_free_count = vm_page_free_count;
2455
2456 if (vm_page_free_wanted_privileged > 0 &&
2457 avail_free_count > 0) {
2458 if (avail_free_count < vm_page_free_wanted_privileged) {
2459 need_priv_wakeup = avail_free_count;
2460 vm_page_free_wanted_privileged -=
2461 avail_free_count;
2462 avail_free_count = 0;
2463 } else {
2464 need_priv_wakeup = vm_page_free_wanted_privileged;
2465 vm_page_free_wanted_privileged = 0;
2466 avail_free_count -=
2467 vm_page_free_wanted_privileged;
2468 }
2469 }
2470
2471 if (vm_page_free_wanted > 0 &&
2472 avail_free_count > vm_page_free_reserved) {
2473 unsigned int available_pages;
2474
2475 available_pages = (avail_free_count -
2476 vm_page_free_reserved);
2477
2478 if (available_pages >= vm_page_free_wanted) {
2479 need_wakeup = vm_page_free_wanted;
2480 vm_page_free_wanted = 0;
2481 } else {
2482 need_wakeup = available_pages;
2483 vm_page_free_wanted -= available_pages;
2484 }
2485 }
2486 lck_mtx_unlock(&vm_page_queue_free_lock);
2487
2488 if (need_priv_wakeup != 0) {
2489 /*
2490 * There shouldn't be that many VM-privileged threads,
2491 * so let's wake them all up, even if we don't quite
2492 * have enough pages to satisfy them all.
2493 */
2494 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2495 }
2496 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2497 /*
2498 * We don't expect to have any more waiters
2499 * after this, so let's wake them all up at
2500 * once.
2501 */
2502 thread_wakeup((event_t) &vm_page_free_count);
2503 } else for (; need_wakeup != 0; need_wakeup--) {
2504 /*
2505 * Wake up one waiter per page we just released.
2506 */
2507 thread_wakeup_one((event_t) &vm_page_free_count);
2508 }
2509 #if CONFIG_EMBEDDED
2510 {
2511 int percent_avail;
2512
2513 /*
2514 * Decide if we need to poke the memorystatus notification thread.
2515 */
2516 percent_avail =
2517 (vm_page_active_count + vm_page_inactive_count +
2518 vm_page_speculative_count + vm_page_free_count +
2519 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2520 atop_64(max_mem);
2521 if (percent_avail >= (kern_memorystatus_level + 5)) {
2522 kern_memorystatus_level = percent_avail;
2523 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2524 }
2525 }
2526 #endif
2527 }
2528 }
2529
2530
2531 /*
2532 * vm_page_wire:
2533 *
2534 * Mark this page as wired down by yet
2535 * another map, removing it from paging queues
2536 * as necessary.
2537 *
2538 * The page's object and the page queues must be locked.
2539 */
2540 void
2541 vm_page_wire(
2542 register vm_page_t mem)
2543 {
2544
2545 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2546
2547 VM_PAGE_CHECK(mem);
2548 if (mem->object) {
2549 vm_object_lock_assert_exclusive(mem->object);
2550 } else {
2551 /*
2552 * In theory, the page should be in an object before it
2553 * gets wired, since we need to hold the object lock
2554 * to update some fields in the page structure.
2555 * However, some code (i386 pmap, for example) might want
2556 * to wire a page before it gets inserted into an object.
2557 * That's somewhat OK, as long as nobody else can get to
2558 * that page and update it at the same time.
2559 */
2560 }
2561 #if DEBUG
2562 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2563 #endif
2564 if ( !VM_PAGE_WIRED(mem)) {
2565 VM_PAGE_QUEUES_REMOVE(mem);
2566
2567 if (mem->object) {
2568 mem->object->wired_page_count++;
2569 assert(mem->object->resident_page_count >=
2570 mem->object->wired_page_count);
2571 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2572 assert(vm_page_purgeable_count > 0);
2573 OSAddAtomic(-1, &vm_page_purgeable_count);
2574 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2575 }
2576 if (mem->object->all_reusable) {
2577 /*
2578 * Wired pages are not counted as "re-usable"
2579 * in "all_reusable" VM objects, so nothing
2580 * to do here.
2581 */
2582 } else if (mem->reusable) {
2583 /*
2584 * This page is not "re-usable" when it's
2585 * wired, so adjust its state and the
2586 * accounting.
2587 */
2588 vm_object_reuse_pages(mem->object,
2589 mem->offset,
2590 mem->offset+PAGE_SIZE_64,
2591 FALSE);
2592 }
2593 }
2594 assert(!mem->reusable);
2595
2596 if (!mem->private && !mem->fictitious && !mem->gobbled)
2597 vm_page_wire_count++;
2598 if (mem->gobbled)
2599 vm_page_gobble_count--;
2600 mem->gobbled = FALSE;
2601 if (mem->zero_fill == TRUE) {
2602 mem->zero_fill = FALSE;
2603 VM_ZF_COUNT_DECR();
2604 }
2605 #if CONFIG_EMBEDDED
2606 {
2607 int percent_avail;
2608
2609 /*
2610 * Decide if we need to poke the memorystatus notification thread.
2611 */
2612 percent_avail =
2613 (vm_page_active_count + vm_page_inactive_count +
2614 vm_page_speculative_count + vm_page_free_count +
2615 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2616 atop_64(max_mem);
2617 if (percent_avail <= (kern_memorystatus_level - 5)) {
2618 kern_memorystatus_level = percent_avail;
2619 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2620 }
2621 }
2622 #endif
2623 /*
2624 * ENCRYPTED SWAP:
2625 * The page could be encrypted, but
2626 * We don't have to decrypt it here
2627 * because we don't guarantee that the
2628 * data is actually valid at this point.
2629 * The page will get decrypted in
2630 * vm_fault_wire() if needed.
2631 */
2632 }
2633 assert(!mem->gobbled);
2634 mem->wire_count++;
2635 VM_PAGE_CHECK(mem);
2636 }
2637
2638 /*
2639 * vm_page_gobble:
2640 *
2641 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2642 *
2643 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2644 */
2645 void
2646 vm_page_gobble(
2647 register vm_page_t mem)
2648 {
2649 vm_page_lockspin_queues();
2650 VM_PAGE_CHECK(mem);
2651
2652 assert(!mem->gobbled);
2653 assert( !VM_PAGE_WIRED(mem));
2654
2655 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2656 if (!mem->private && !mem->fictitious)
2657 vm_page_wire_count++;
2658 }
2659 vm_page_gobble_count++;
2660 mem->gobbled = TRUE;
2661 vm_page_unlock_queues();
2662 }
2663
2664 /*
2665 * vm_page_unwire:
2666 *
2667 * Release one wiring of this page, potentially
2668 * enabling it to be paged again.
2669 *
2670 * The page's object and the page queues must be locked.
2671 */
2672 void
2673 vm_page_unwire(
2674 register vm_page_t mem)
2675 {
2676
2677 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2678
2679 VM_PAGE_CHECK(mem);
2680 assert(VM_PAGE_WIRED(mem));
2681 assert(mem->object != VM_OBJECT_NULL);
2682 #if DEBUG
2683 vm_object_lock_assert_exclusive(mem->object);
2684 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2685 #endif
2686 if (--mem->wire_count == 0) {
2687 assert(!mem->private && !mem->fictitious);
2688 vm_page_wire_count--;
2689 assert(mem->object->wired_page_count > 0);
2690 mem->object->wired_page_count--;
2691 assert(mem->object->resident_page_count >=
2692 mem->object->wired_page_count);
2693 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2694 OSAddAtomic(+1, &vm_page_purgeable_count);
2695 assert(vm_page_purgeable_wired_count > 0);
2696 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2697 }
2698 assert(!mem->laundry);
2699 assert(mem->object != kernel_object);
2700 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2701 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2702 vm_page_deactivate(mem);
2703 } else {
2704 vm_page_activate(mem);
2705 }
2706 #if CONFIG_EMBEDDED
2707 {
2708 int percent_avail;
2709
2710 /*
2711 * Decide if we need to poke the memorystatus notification thread.
2712 */
2713 percent_avail =
2714 (vm_page_active_count + vm_page_inactive_count +
2715 vm_page_speculative_count + vm_page_free_count +
2716 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2717 atop_64(max_mem);
2718 if (percent_avail >= (kern_memorystatus_level + 5)) {
2719 kern_memorystatus_level = percent_avail;
2720 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2721 }
2722 }
2723 #endif
2724 }
2725 VM_PAGE_CHECK(mem);
2726 }
2727
2728 /*
2729 * vm_page_deactivate:
2730 *
2731 * Returns the given page to the inactive list,
2732 * indicating that no physical maps have access
2733 * to this page. [Used by the physical mapping system.]
2734 *
2735 * The page queues must be locked.
2736 */
2737 void
2738 vm_page_deactivate(
2739 vm_page_t m)
2740 {
2741 vm_page_deactivate_internal(m, TRUE);
2742 }
2743
2744
2745 void
2746 vm_page_deactivate_internal(
2747 vm_page_t m,
2748 boolean_t clear_hw_reference)
2749 {
2750
2751 VM_PAGE_CHECK(m);
2752 assert(m->object != kernel_object);
2753 assert(m->phys_page != vm_page_guard_addr);
2754
2755 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2756 #if DEBUG
2757 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2758 #endif
2759 /*
2760 * This page is no longer very interesting. If it was
2761 * interesting (active or inactive/referenced), then we
2762 * clear the reference bit and (re)enter it in the
2763 * inactive queue. Note wired pages should not have
2764 * their reference bit cleared.
2765 */
2766 if (m->gobbled) { /* can this happen? */
2767 assert( !VM_PAGE_WIRED(m));
2768
2769 if (!m->private && !m->fictitious)
2770 vm_page_wire_count--;
2771 vm_page_gobble_count--;
2772 m->gobbled = FALSE;
2773 }
2774 if (m->private || (VM_PAGE_WIRED(m)))
2775 return;
2776
2777 if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
2778 pmap_clear_reference(m->phys_page);
2779
2780 m->reference = FALSE;
2781 m->no_cache = FALSE;
2782
2783 if (!m->inactive) {
2784 VM_PAGE_QUEUES_REMOVE(m);
2785
2786 assert(!m->laundry);
2787 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2788
2789 if (!IP_VALID(memory_manager_default) &&
2790 m->dirty && m->object->internal &&
2791 (m->object->purgable == VM_PURGABLE_DENY ||
2792 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2793 m->object->purgable == VM_PURGABLE_VOLATILE )) {
2794 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2795 m->throttled = TRUE;
2796 vm_page_throttled_count++;
2797 } else {
2798 if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
2799 vm_page_speculate(m, FALSE);
2800 #if DEVELOPMENT || DEBUG
2801 vm_page_speculative_recreated++;
2802 #endif
2803 return;
2804 } else {
2805 if (m->zero_fill) {
2806 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2807 vm_zf_queue_count++;
2808 } else {
2809 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2810 }
2811 }
2812 m->inactive = TRUE;
2813 if (!m->fictitious) {
2814 vm_page_inactive_count++;
2815 token_new_pagecount++;
2816 }
2817 }
2818 }
2819 }
2820
2821 /*
2822 * vm_page_activate:
2823 *
2824 * Put the specified page on the active list (if appropriate).
2825 *
2826 * The page queues must be locked.
2827 */
2828
2829 void
2830 vm_page_activate(
2831 register vm_page_t m)
2832 {
2833 VM_PAGE_CHECK(m);
2834 #ifdef FIXME_4778297
2835 assert(m->object != kernel_object);
2836 #endif
2837 assert(m->phys_page != vm_page_guard_addr);
2838 #if DEBUG
2839 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2840 #endif
2841 if (m->gobbled) {
2842 assert( !VM_PAGE_WIRED(m));
2843 if (!m->private && !m->fictitious)
2844 vm_page_wire_count--;
2845 vm_page_gobble_count--;
2846 m->gobbled = FALSE;
2847 }
2848 if (m->private)
2849 return;
2850
2851 #if DEBUG
2852 if (m->active)
2853 panic("vm_page_activate: already active");
2854 #endif
2855
2856 if (m->speculative) {
2857 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2858 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2859 }
2860
2861 VM_PAGE_QUEUES_REMOVE(m);
2862
2863 if ( !VM_PAGE_WIRED(m)) {
2864 assert(!m->laundry);
2865 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2866 if (!IP_VALID(memory_manager_default) &&
2867 !m->fictitious && m->dirty && m->object->internal &&
2868 (m->object->purgable == VM_PURGABLE_DENY ||
2869 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2870 m->object->purgable == VM_PURGABLE_VOLATILE )) {
2871 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2872 m->throttled = TRUE;
2873 vm_page_throttled_count++;
2874 } else {
2875 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2876 m->active = TRUE;
2877 if (!m->fictitious)
2878 vm_page_active_count++;
2879 }
2880 m->reference = TRUE;
2881 m->no_cache = FALSE;
2882 }
2883 VM_PAGE_CHECK(m);
2884 }
2885
2886
2887 /*
2888 * vm_page_speculate:
2889 *
2890 * Put the specified page on the speculative list (if appropriate).
2891 *
2892 * The page queues must be locked.
2893 */
2894 void
2895 vm_page_speculate(
2896 vm_page_t m,
2897 boolean_t new)
2898 {
2899 struct vm_speculative_age_q *aq;
2900
2901 VM_PAGE_CHECK(m);
2902 assert(m->object != kernel_object);
2903 assert(m->phys_page != vm_page_guard_addr);
2904 #if DEBUG
2905 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2906 #endif
2907
2908 VM_PAGE_QUEUES_REMOVE(m);
2909
2910 if ( !VM_PAGE_WIRED(m)) {
2911 mach_timespec_t ts;
2912 clock_sec_t sec;
2913 clock_nsec_t nsec;
2914
2915 clock_get_system_nanotime(&sec, &nsec);
2916 ts.tv_sec = (unsigned int) sec;
2917 ts.tv_nsec = nsec;
2918
2919 if (vm_page_speculative_count == 0) {
2920
2921 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2922 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2923
2924 aq = &vm_page_queue_speculative[speculative_age_index];
2925
2926 /*
2927 * set the timer to begin a new group
2928 */
2929 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2930 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2931
2932 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2933 } else {
2934 aq = &vm_page_queue_speculative[speculative_age_index];
2935
2936 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2937
2938 speculative_age_index++;
2939
2940 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2941 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2942 if (speculative_age_index == speculative_steal_index) {
2943 speculative_steal_index = speculative_age_index + 1;
2944
2945 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2946 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2947 }
2948 aq = &vm_page_queue_speculative[speculative_age_index];
2949
2950 if (!queue_empty(&aq->age_q))
2951 vm_page_speculate_ageit(aq);
2952
2953 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2954 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2955
2956 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2957 }
2958 }
2959 enqueue_tail(&aq->age_q, &m->pageq);
2960 m->speculative = TRUE;
2961 vm_page_speculative_count++;
2962
2963 if (new == TRUE) {
2964 m->object->pages_created++;
2965 #if DEVELOPMENT || DEBUG
2966 vm_page_speculative_created++;
2967 #endif
2968 }
2969 }
2970 VM_PAGE_CHECK(m);
2971 }
2972
2973
2974 /*
2975 * move pages from the specified aging bin to
2976 * the speculative bin that pageout_scan claims from
2977 *
2978 * The page queues must be locked.
2979 */
2980 void
2981 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2982 {
2983 struct vm_speculative_age_q *sq;
2984 vm_page_t t;
2985
2986 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2987
2988 if (queue_empty(&sq->age_q)) {
2989 sq->age_q.next = aq->age_q.next;
2990 sq->age_q.prev = aq->age_q.prev;
2991
2992 t = (vm_page_t)sq->age_q.next;
2993 t->pageq.prev = &sq->age_q;
2994
2995 t = (vm_page_t)sq->age_q.prev;
2996 t->pageq.next = &sq->age_q;
2997 } else {
2998 t = (vm_page_t)sq->age_q.prev;
2999 t->pageq.next = aq->age_q.next;
3000
3001 t = (vm_page_t)aq->age_q.next;
3002 t->pageq.prev = sq->age_q.prev;
3003
3004 t = (vm_page_t)aq->age_q.prev;
3005 t->pageq.next = &sq->age_q;
3006
3007 sq->age_q.prev = aq->age_q.prev;
3008 }
3009 queue_init(&aq->age_q);
3010 }
3011
3012
3013 void
3014 vm_page_lru(
3015 vm_page_t m)
3016 {
3017 VM_PAGE_CHECK(m);
3018 assert(m->object != kernel_object);
3019 assert(m->phys_page != vm_page_guard_addr);
3020
3021 #if DEBUG
3022 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3023 #endif
3024 if (m->active || m->reference)
3025 return;
3026
3027 if (m->private || (VM_PAGE_WIRED(m)))
3028 return;
3029
3030 m->no_cache = FALSE;
3031
3032 VM_PAGE_QUEUES_REMOVE(m);
3033
3034 assert(!m->laundry);
3035 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
3036
3037 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
3038 m->inactive = TRUE;
3039
3040 vm_page_inactive_count++;
3041 token_new_pagecount++;
3042 }
3043
3044
3045 void
3046 vm_page_reactivate_all_throttled(void)
3047 {
3048 vm_page_t first_throttled, last_throttled;
3049 vm_page_t first_active;
3050 vm_page_t m;
3051 int extra_active_count;
3052
3053 extra_active_count = 0;
3054 vm_page_lock_queues();
3055 if (! queue_empty(&vm_page_queue_throttled)) {
3056 /*
3057 * Switch "throttled" pages to "active".
3058 */
3059 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3060 VM_PAGE_CHECK(m);
3061 assert(m->throttled);
3062 assert(!m->active);
3063 assert(!m->inactive);
3064 assert(!m->speculative);
3065 assert(!VM_PAGE_WIRED(m));
3066 if (!m->fictitious) {
3067 extra_active_count++;
3068 }
3069 m->throttled = FALSE;
3070 m->active = TRUE;
3071 VM_PAGE_CHECK(m);
3072 }
3073
3074 /*
3075 * Transfer the entire throttled queue to a regular LRU page queues.
3076 * We insert it at the head of the active queue, so that these pages
3077 * get re-evaluated by the LRU algorithm first, since they've been
3078 * completely out of it until now.
3079 */
3080 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3081 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3082 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3083 if (queue_empty(&vm_page_queue_active)) {
3084 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3085 } else {
3086 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3087 }
3088 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3089 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3090 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3091
3092 #if DEBUG
3093 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3094 #endif
3095 queue_init(&vm_page_queue_throttled);
3096 /*
3097 * Adjust the global page counts.
3098 */
3099 vm_page_active_count += extra_active_count;
3100 vm_page_throttled_count = 0;
3101 }
3102 assert(vm_page_throttled_count == 0);
3103 assert(queue_empty(&vm_page_queue_throttled));
3104 vm_page_unlock_queues();
3105 }
3106
3107
3108 /*
3109 * move pages from the indicated local queue to the global active queue
3110 * its ok to fail if we're below the hard limit and force == FALSE
3111 * the nolocks == TRUE case is to allow this function to be run on
3112 * the hibernate path
3113 */
3114
3115 void
3116 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3117 {
3118 struct vpl *lq;
3119 vm_page_t first_local, last_local;
3120 vm_page_t first_active;
3121 vm_page_t m;
3122 uint32_t count = 0;
3123
3124 if (vm_page_local_q == NULL)
3125 return;
3126
3127 lq = &vm_page_local_q[lid].vpl_un.vpl;
3128
3129 if (nolocks == FALSE) {
3130 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3131 if ( !vm_page_trylockspin_queues())
3132 return;
3133 } else
3134 vm_page_lockspin_queues();
3135
3136 VPL_LOCK(&lq->vpl_lock);
3137 }
3138 if (lq->vpl_count) {
3139 /*
3140 * Switch "local" pages to "active".
3141 */
3142 assert(!queue_empty(&lq->vpl_queue));
3143
3144 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3145 VM_PAGE_CHECK(m);
3146 assert(m->local);
3147 assert(!m->active);
3148 assert(!m->inactive);
3149 assert(!m->speculative);
3150 assert(!VM_PAGE_WIRED(m));
3151 assert(!m->throttled);
3152 assert(!m->fictitious);
3153
3154 if (m->local_id != lid)
3155 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3156
3157 m->local_id = 0;
3158 m->local = FALSE;
3159 m->active = TRUE;
3160 VM_PAGE_CHECK(m);
3161
3162 count++;
3163 }
3164 if (count != lq->vpl_count)
3165 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3166
3167 /*
3168 * Transfer the entire local queue to a regular LRU page queues.
3169 */
3170 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3171 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3172 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3173
3174 if (queue_empty(&vm_page_queue_active)) {
3175 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3176 } else {
3177 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3178 }
3179 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3180 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3181 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3182
3183 queue_init(&lq->vpl_queue);
3184 /*
3185 * Adjust the global page counts.
3186 */
3187 vm_page_active_count += lq->vpl_count;
3188 lq->vpl_count = 0;
3189 }
3190 assert(queue_empty(&lq->vpl_queue));
3191
3192 if (nolocks == FALSE) {
3193 VPL_UNLOCK(&lq->vpl_lock);
3194 vm_page_unlock_queues();
3195 }
3196 }
3197
3198 /*
3199 * vm_page_part_zero_fill:
3200 *
3201 * Zero-fill a part of the page.
3202 */
3203 void
3204 vm_page_part_zero_fill(
3205 vm_page_t m,
3206 vm_offset_t m_pa,
3207 vm_size_t len)
3208 {
3209 vm_page_t tmp;
3210
3211 VM_PAGE_CHECK(m);
3212 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3213 pmap_zero_part_page(m->phys_page, m_pa, len);
3214 #else
3215 while (1) {
3216 tmp = vm_page_grab();
3217 if (tmp == VM_PAGE_NULL) {
3218 vm_page_wait(THREAD_UNINT);
3219 continue;
3220 }
3221 break;
3222 }
3223 vm_page_zero_fill(tmp);
3224 if(m_pa != 0) {
3225 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3226 }
3227 if((m_pa + len) < PAGE_SIZE) {
3228 vm_page_part_copy(m, m_pa + len, tmp,
3229 m_pa + len, PAGE_SIZE - (m_pa + len));
3230 }
3231 vm_page_copy(tmp,m);
3232 VM_PAGE_FREE(tmp);
3233 #endif
3234
3235 }
3236
3237 /*
3238 * vm_page_zero_fill:
3239 *
3240 * Zero-fill the specified page.
3241 */
3242 void
3243 vm_page_zero_fill(
3244 vm_page_t m)
3245 {
3246 XPR(XPR_VM_PAGE,
3247 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3248 m->object, m->offset, m, 0,0);
3249
3250 VM_PAGE_CHECK(m);
3251
3252 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3253 pmap_zero_page(m->phys_page);
3254 }
3255
3256 /*
3257 * vm_page_part_copy:
3258 *
3259 * copy part of one page to another
3260 */
3261
3262 void
3263 vm_page_part_copy(
3264 vm_page_t src_m,
3265 vm_offset_t src_pa,
3266 vm_page_t dst_m,
3267 vm_offset_t dst_pa,
3268 vm_size_t len)
3269 {
3270 VM_PAGE_CHECK(src_m);
3271 VM_PAGE_CHECK(dst_m);
3272
3273 pmap_copy_part_page(src_m->phys_page, src_pa,
3274 dst_m->phys_page, dst_pa, len);
3275 }
3276
3277 /*
3278 * vm_page_copy:
3279 *
3280 * Copy one page to another
3281 *
3282 * ENCRYPTED SWAP:
3283 * The source page should not be encrypted. The caller should
3284 * make sure the page is decrypted first, if necessary.
3285 */
3286
3287 int vm_page_copy_cs_validations = 0;
3288 int vm_page_copy_cs_tainted = 0;
3289
3290 void
3291 vm_page_copy(
3292 vm_page_t src_m,
3293 vm_page_t dest_m)
3294 {
3295 XPR(XPR_VM_PAGE,
3296 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3297 src_m->object, src_m->offset,
3298 dest_m->object, dest_m->offset,
3299 0);
3300
3301 VM_PAGE_CHECK(src_m);
3302 VM_PAGE_CHECK(dest_m);
3303
3304 /*
3305 * ENCRYPTED SWAP:
3306 * The source page should not be encrypted at this point.
3307 * The destination page will therefore not contain encrypted
3308 * data after the copy.
3309 */
3310 if (src_m->encrypted) {
3311 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3312 }
3313 dest_m->encrypted = FALSE;
3314
3315 if (src_m->object != VM_OBJECT_NULL &&
3316 src_m->object->code_signed) {
3317 /*
3318 * We're copying a page from a code-signed object.
3319 * Whoever ends up mapping the copy page might care about
3320 * the original page's integrity, so let's validate the
3321 * source page now.
3322 */
3323 vm_page_copy_cs_validations++;
3324 vm_page_validate_cs(src_m);
3325 }
3326 /*
3327 * Propagate the cs_tainted bit to the copy page. Do not propagate
3328 * the cs_validated bit.
3329 */
3330 dest_m->cs_tainted = src_m->cs_tainted;
3331 if (dest_m->cs_tainted) {
3332 vm_page_copy_cs_tainted++;
3333 }
3334
3335 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3336 }
3337
3338 #if MACH_ASSERT
3339 static void
3340 _vm_page_print(
3341 vm_page_t p)
3342 {
3343 printf("vm_page %p: \n", p);
3344 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3345 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3346 printf(" next=%p\n", p->next);
3347 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3348 printf(" wire_count=%u\n", p->wire_count);
3349
3350 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3351 (p->local ? "" : "!"),
3352 (p->inactive ? "" : "!"),
3353 (p->active ? "" : "!"),
3354 (p->pageout_queue ? "" : "!"),
3355 (p->speculative ? "" : "!"),
3356 (p->laundry ? "" : "!"));
3357 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3358 (p->free ? "" : "!"),
3359 (p->reference ? "" : "!"),
3360 (p->gobbled ? "" : "!"),
3361 (p->private ? "" : "!"),
3362 (p->throttled ? "" : "!"));
3363 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3364 (p->busy ? "" : "!"),
3365 (p->wanted ? "" : "!"),
3366 (p->tabled ? "" : "!"),
3367 (p->fictitious ? "" : "!"),
3368 (p->pmapped ? "" : "!"),
3369 (p->wpmapped ? "" : "!"));
3370 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3371 (p->pageout ? "" : "!"),
3372 (p->absent ? "" : "!"),
3373 (p->error ? "" : "!"),
3374 (p->dirty ? "" : "!"),
3375 (p->cleaning ? "" : "!"),
3376 (p->precious ? "" : "!"),
3377 (p->clustered ? "" : "!"));
3378 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3379 (p->overwriting ? "" : "!"),
3380 (p->restart ? "" : "!"),
3381 (p->unusual ? "" : "!"),
3382 (p->encrypted ? "" : "!"),
3383 (p->encrypted_cleaning ? "" : "!"));
3384 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3385 (p->list_req_pending ? "" : "!"),
3386 (p->dump_cleaning ? "" : "!"),
3387 (p->cs_validated ? "" : "!"),
3388 (p->cs_tainted ? "" : "!"),
3389 (p->no_cache ? "" : "!"));
3390 printf(" %szero_fill\n",
3391 (p->zero_fill ? "" : "!"));
3392
3393 printf("phys_page=0x%x\n", p->phys_page);
3394 }
3395
3396 /*
3397 * Check that the list of pages is ordered by
3398 * ascending physical address and has no holes.
3399 */
3400 static int
3401 vm_page_verify_contiguous(
3402 vm_page_t pages,
3403 unsigned int npages)
3404 {
3405 register vm_page_t m;
3406 unsigned int page_count;
3407 vm_offset_t prev_addr;
3408
3409 prev_addr = pages->phys_page;
3410 page_count = 1;
3411 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3412 if (m->phys_page != prev_addr + 1) {
3413 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3414 m, (long)prev_addr, m->phys_page);
3415 printf("pages %p page_count %d\n", pages, page_count);
3416 panic("vm_page_verify_contiguous: not contiguous!");
3417 }
3418 prev_addr = m->phys_page;
3419 ++page_count;
3420 }
3421 if (page_count != npages) {
3422 printf("pages %p actual count 0x%x but requested 0x%x\n",
3423 pages, page_count, npages);
3424 panic("vm_page_verify_contiguous: count error");
3425 }
3426 return 1;
3427 }
3428
3429
3430 /*
3431 * Check the free lists for proper length etc.
3432 */
3433 static unsigned int
3434 vm_page_verify_free_list(
3435 unsigned int color,
3436 vm_page_t look_for_page,
3437 boolean_t expect_page)
3438 {
3439 unsigned int npages;
3440 vm_page_t m;
3441 vm_page_t prev_m;
3442 boolean_t found_page;
3443
3444 found_page = FALSE;
3445 npages = 0;
3446 prev_m = (vm_page_t) &vm_page_queue_free[color];
3447 queue_iterate(&vm_page_queue_free[color],
3448 m,
3449 vm_page_t,
3450 pageq) {
3451 if (m == look_for_page) {
3452 found_page = TRUE;
3453 }
3454 if ((vm_page_t) m->pageq.prev != prev_m)
3455 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3456 color, npages, m, m->pageq.prev, prev_m);
3457 if ( ! m->free )
3458 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3459 color, npages, m);
3460 if ( ! m->busy )
3461 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3462 color, npages, m);
3463 if ( (m->phys_page & vm_color_mask) != color)
3464 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3465 color, npages, m, m->phys_page & vm_color_mask, color);
3466 ++npages;
3467 prev_m = m;
3468 }
3469 if (look_for_page != VM_PAGE_NULL) {
3470 unsigned int other_color;
3471
3472 if (expect_page && !found_page) {
3473 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3474 color, npages, look_for_page, look_for_page->phys_page);
3475 _vm_page_print(look_for_page);
3476 for (other_color = 0;
3477 other_color < vm_colors;
3478 other_color++) {
3479 if (other_color == color)
3480 continue;
3481 vm_page_verify_free_list(other_color, look_for_page, FALSE);
3482 }
3483 panic("vm_page_verify_free_list(color=%u)\n", color);
3484 }
3485 if (!expect_page && found_page) {
3486 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3487 color, npages, look_for_page, look_for_page->phys_page);
3488 }
3489 }
3490 return npages;
3491 }
3492
3493 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3494 static void
3495 vm_page_verify_free_lists( void )
3496 {
3497 unsigned int color, npages;
3498
3499 if (! vm_page_verify_free_lists_enabled)
3500 return;
3501
3502 npages = 0;
3503
3504 lck_mtx_lock(&vm_page_queue_free_lock);
3505
3506 for( color = 0; color < vm_colors; color++ ) {
3507 npages += vm_page_verify_free_list(color, VM_PAGE_NULL, FALSE);
3508 }
3509 if (npages != vm_page_free_count)
3510 panic("vm_page_verify_free_lists: npages %u free_count %d",
3511 npages, vm_page_free_count);
3512
3513 lck_mtx_unlock(&vm_page_queue_free_lock);
3514 }
3515
3516 void
3517 vm_page_queues_assert(
3518 vm_page_t mem,
3519 int val)
3520 {
3521 if (mem->free + mem->active + mem->inactive + mem->speculative +
3522 mem->throttled + mem->pageout_queue > (val)) {
3523 _vm_page_print(mem);
3524 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3525 }
3526 if (VM_PAGE_WIRED(mem)) {
3527 assert(!mem->active);
3528 assert(!mem->inactive);
3529 assert(!mem->speculative);
3530 assert(!mem->throttled);
3531 }
3532 }
3533 #endif /* MACH_ASSERT */
3534
3535
3536 /*
3537 * CONTIGUOUS PAGE ALLOCATION
3538 *
3539 * Find a region large enough to contain at least n pages
3540 * of contiguous physical memory.
3541 *
3542 * This is done by traversing the vm_page_t array in a linear fashion
3543 * we assume that the vm_page_t array has the avaiable physical pages in an
3544 * ordered, ascending list... this is currently true of all our implementations
3545 * and must remain so... there can be 'holes' in the array... we also can
3546 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3547 * which use to happen via 'vm_page_convert'... that function was no longer
3548 * being called and was removed...
3549 *
3550 * The basic flow consists of stabilizing some of the interesting state of
3551 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3552 * sweep at the beginning of the array looking for pages that meet our criterea
3553 * for a 'stealable' page... currently we are pretty conservative... if the page
3554 * meets this criterea and is physically contiguous to the previous page in the 'run'
3555 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3556 * and start to develop a new run... if at this point we've already considered
3557 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3558 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3559 * to other threads trying to acquire free pages (or move pages from q to q),
3560 * and then continue from the spot we left off... we only make 1 pass through the
3561 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3562 * which steals the pages from the queues they're currently on... pages on the free
3563 * queue can be stolen directly... pages that are on any of the other queues
3564 * must be removed from the object they are tabled on... this requires taking the
3565 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3566 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3567 * dump the pages we've currently stolen back to the free list, and pick up our
3568 * scan from the point where we aborted the 'current' run.
3569 *
3570 *
3571 * Requirements:
3572 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3573 *
3574 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3575 *
3576 * Algorithm:
3577 */
3578
3579 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3580
3581
3582 #define RESET_STATE_OF_RUN() \
3583 MACRO_BEGIN \
3584 prevcontaddr = -2; \
3585 start_pnum = -1; \
3586 free_considered = 0; \
3587 substitute_needed = 0; \
3588 npages = 0; \
3589 MACRO_END
3590
3591 /*
3592 * Can we steal in-use (i.e. not free) pages when searching for
3593 * physically-contiguous pages ?
3594 */
3595 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3596
3597 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3598 #if DEBUG
3599 int vm_page_find_contig_debug = 0;
3600 #endif
3601
3602 static vm_page_t
3603 vm_page_find_contiguous(
3604 unsigned int contig_pages,
3605 ppnum_t max_pnum,
3606 ppnum_t pnum_mask,
3607 boolean_t wire,
3608 int flags)
3609 {
3610 vm_page_t m = NULL;
3611 ppnum_t prevcontaddr;
3612 ppnum_t start_pnum;
3613 unsigned int npages, considered, scanned;
3614 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3615 unsigned int idx_last_contig_page_found = 0;
3616 int free_considered, free_available;
3617 int substitute_needed;
3618 boolean_t wrapped;
3619 #if DEBUG
3620 clock_sec_t tv_start_sec, tv_end_sec;
3621 clock_usec_t tv_start_usec, tv_end_usec;
3622 #endif
3623 #if MACH_ASSERT
3624 int yielded = 0;
3625 int dumped_run = 0;
3626 int stolen_pages = 0;
3627 #endif
3628
3629 if (contig_pages == 0)
3630 return VM_PAGE_NULL;
3631
3632 #if MACH_ASSERT
3633 vm_page_verify_free_lists();
3634 #endif
3635 #if DEBUG
3636 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3637 #endif
3638 vm_page_lock_queues();
3639 lck_mtx_lock(&vm_page_queue_free_lock);
3640
3641 RESET_STATE_OF_RUN();
3642
3643 scanned = 0;
3644 considered = 0;
3645 free_available = vm_page_free_count - vm_page_free_reserved;
3646
3647 wrapped = FALSE;
3648
3649 if(flags & KMA_LOMEM)
3650 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3651 else
3652 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3653
3654 orig_last_idx = idx_last_contig_page_found;
3655 last_idx = orig_last_idx;
3656
3657 for (page_idx = last_idx, start_idx = last_idx;
3658 npages < contig_pages && page_idx < vm_pages_count;
3659 page_idx++) {
3660 retry:
3661 if (wrapped &&
3662 npages == 0 &&
3663 page_idx >= orig_last_idx) {
3664 /*
3665 * We're back where we started and we haven't
3666 * found any suitable contiguous range. Let's
3667 * give up.
3668 */
3669 break;
3670 }
3671 scanned++;
3672 m = &vm_pages[page_idx];
3673
3674 assert(!m->fictitious);
3675 assert(!m->private);
3676
3677 if (max_pnum && m->phys_page > max_pnum) {
3678 /* no more low pages... */
3679 break;
3680 }
3681 if ( !(flags & KMA_LOMEM) && m->phys_page <= vm_lopage_poolend &&
3682 m->phys_page >= vm_lopage_poolstart) {
3683 /*
3684 * don't want to take pages from our
3685 * reserved pool of low memory
3686 * so don't consider it which
3687 * means starting a new run
3688 */
3689 RESET_STATE_OF_RUN();
3690
3691 } else if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3692 /*
3693 * not aligned
3694 */
3695 RESET_STATE_OF_RUN();
3696
3697 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3698 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3699 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3700 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3701 m->pageout) {
3702 /*
3703 * page is in a transient state
3704 * or a state we don't want to deal
3705 * with, so don't consider it which
3706 * means starting a new run
3707 */
3708 RESET_STATE_OF_RUN();
3709
3710 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3711 /*
3712 * page needs to be on one of our queues
3713 * in order for it to be stable behind the
3714 * locks we hold at this point...
3715 * if not, don't consider it which
3716 * means starting a new run
3717 */
3718 RESET_STATE_OF_RUN();
3719
3720 } else if (!m->free && (!m->tabled || m->busy)) {
3721 /*
3722 * pages on the free list are always 'busy'
3723 * so we couldn't test for 'busy' in the check
3724 * for the transient states... pages that are
3725 * 'free' are never 'tabled', so we also couldn't
3726 * test for 'tabled'. So we check here to make
3727 * sure that a non-free page is not busy and is
3728 * tabled on an object...
3729 * if not, don't consider it which
3730 * means starting a new run
3731 */
3732 RESET_STATE_OF_RUN();
3733
3734 } else {
3735 if (m->phys_page != prevcontaddr + 1) {
3736 if ((m->phys_page & pnum_mask) != 0) {
3737 RESET_STATE_OF_RUN();
3738 goto did_consider;
3739 } else {
3740 npages = 1;
3741 start_idx = page_idx;
3742 start_pnum = m->phys_page;
3743 }
3744 } else {
3745 npages++;
3746 }
3747 prevcontaddr = m->phys_page;
3748
3749 VM_PAGE_CHECK(m);
3750 if (m->free) {
3751 free_considered++;
3752 } else {
3753 /*
3754 * This page is not free.
3755 * If we can't steal used pages,
3756 * we have to give up this run
3757 * and keep looking.
3758 * Otherwise, we might need to
3759 * move the contents of this page
3760 * into a substitute page.
3761 */
3762 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3763 if (m->pmapped || m->dirty) {
3764 substitute_needed++;
3765 }
3766 #else
3767 RESET_STATE_OF_RUN();
3768 #endif
3769 }
3770
3771 if ((free_considered + substitute_needed) > free_available) {
3772 /*
3773 * if we let this run continue
3774 * we will end up dropping the vm_page_free_count
3775 * below the reserve limit... we need to abort
3776 * this run, but we can at least re-consider this
3777 * page... thus the jump back to 'retry'
3778 */
3779 RESET_STATE_OF_RUN();
3780
3781 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3782 considered++;
3783 goto retry;
3784 }
3785 /*
3786 * free_available == 0
3787 * so can't consider any free pages... if
3788 * we went to retry in this case, we'd
3789 * get stuck looking at the same page
3790 * w/o making any forward progress
3791 * we also want to take this path if we've already
3792 * reached our limit that controls the lock latency
3793 */
3794 }
3795 }
3796 did_consider:
3797 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3798
3799 lck_mtx_unlock(&vm_page_queue_free_lock);
3800 vm_page_unlock_queues();
3801
3802 mutex_pause(0);
3803
3804 vm_page_lock_queues();
3805 lck_mtx_lock(&vm_page_queue_free_lock);
3806
3807 RESET_STATE_OF_RUN();
3808 /*
3809 * reset our free page limit since we
3810 * dropped the lock protecting the vm_page_free_queue
3811 */
3812 free_available = vm_page_free_count - vm_page_free_reserved;
3813 considered = 0;
3814 #if MACH_ASSERT
3815 yielded++;
3816 #endif
3817 goto retry;
3818 }
3819 considered++;
3820 }
3821 m = VM_PAGE_NULL;
3822
3823 if (npages != contig_pages) {
3824 if (!wrapped) {
3825 /*
3826 * We didn't find a contiguous range but we didn't
3827 * start from the very first page.
3828 * Start again from the very first page.
3829 */
3830 RESET_STATE_OF_RUN();
3831 if( flags & KMA_LOMEM)
3832 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3833 else
3834 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3835 last_idx = 0;
3836 page_idx = last_idx;
3837 wrapped = TRUE;
3838 goto retry;
3839 }
3840 lck_mtx_unlock(&vm_page_queue_free_lock);
3841 } else {
3842 vm_page_t m1;
3843 vm_page_t m2;
3844 unsigned int cur_idx;
3845 unsigned int tmp_start_idx;
3846 vm_object_t locked_object = VM_OBJECT_NULL;
3847 boolean_t abort_run = FALSE;
3848
3849 assert(page_idx - start_idx == contig_pages);
3850
3851 tmp_start_idx = start_idx;
3852
3853 /*
3854 * first pass through to pull the free pages
3855 * off of the free queue so that in case we
3856 * need substitute pages, we won't grab any
3857 * of the free pages in the run... we'll clear
3858 * the 'free' bit in the 2nd pass, and even in
3859 * an abort_run case, we'll collect all of the
3860 * free pages in this run and return them to the free list
3861 */
3862 while (start_idx < page_idx) {
3863
3864 m1 = &vm_pages[start_idx++];
3865
3866 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3867 assert(m1->free);
3868 #endif
3869
3870 if (m1->free) {
3871 unsigned int color;
3872
3873 color = m1->phys_page & vm_color_mask;
3874 #if MACH_ASSERT
3875 vm_page_verify_free_list(color, m1, TRUE);
3876 #endif
3877 queue_remove(&vm_page_queue_free[color],
3878 m1,
3879 vm_page_t,
3880 pageq);
3881 m1->pageq.next = NULL;
3882 m1->pageq.prev = NULL;
3883 #if MACH_ASSERT
3884 vm_page_verify_free_list(color, VM_PAGE_NULL, FALSE);
3885 #endif
3886 /*
3887 * Clear the "free" bit so that this page
3888 * does not get considered for another
3889 * concurrent physically-contiguous allocation.
3890 */
3891 m1->free = FALSE;
3892 assert(m1->busy);
3893
3894 vm_page_free_count--;
3895 }
3896 }
3897 /*
3898 * adjust global freelist counts
3899 */
3900 if (vm_page_free_count < vm_page_free_count_minimum)
3901 vm_page_free_count_minimum = vm_page_free_count;
3902
3903 if( flags & KMA_LOMEM)
3904 vm_page_lomem_find_contiguous_last_idx = page_idx;
3905 else
3906 vm_page_find_contiguous_last_idx = page_idx;
3907
3908 /*
3909 * we can drop the free queue lock at this point since
3910 * we've pulled any 'free' candidates off of the list
3911 * we need it dropped so that we can do a vm_page_grab
3912 * when substituing for pmapped/dirty pages
3913 */
3914 lck_mtx_unlock(&vm_page_queue_free_lock);
3915
3916 start_idx = tmp_start_idx;
3917 cur_idx = page_idx - 1;
3918
3919 while (start_idx++ < page_idx) {
3920 /*
3921 * must go through the list from back to front
3922 * so that the page list is created in the
3923 * correct order - low -> high phys addresses
3924 */
3925 m1 = &vm_pages[cur_idx--];
3926
3927 assert(!m1->free);
3928 if (m1->object == VM_OBJECT_NULL) {
3929 /*
3930 * page has already been removed from
3931 * the free list in the 1st pass
3932 */
3933 assert(m1->offset == (vm_object_offset_t) -1);
3934 assert(m1->busy);
3935 assert(!m1->wanted);
3936 assert(!m1->laundry);
3937 } else {
3938 vm_object_t object;
3939
3940 if (abort_run == TRUE)
3941 continue;
3942
3943 object = m1->object;
3944
3945 if (object != locked_object) {
3946 if (locked_object) {
3947 vm_object_unlock(locked_object);
3948 locked_object = VM_OBJECT_NULL;
3949 }
3950 if (vm_object_lock_try(object))
3951 locked_object = object;
3952 }
3953 if (locked_object == VM_OBJECT_NULL ||
3954 (VM_PAGE_WIRED(m1) || m1->gobbled ||
3955 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3956 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3957 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3958
3959 if (locked_object) {
3960 vm_object_unlock(locked_object);
3961 locked_object = VM_OBJECT_NULL;
3962 }
3963 tmp_start_idx = cur_idx;
3964 abort_run = TRUE;
3965 continue;
3966 }
3967 if (m1->pmapped || m1->dirty) {
3968 int refmod;
3969 vm_object_offset_t offset;
3970
3971 m2 = vm_page_grab();
3972
3973 if (m2 == VM_PAGE_NULL) {
3974 if (locked_object) {
3975 vm_object_unlock(locked_object);
3976 locked_object = VM_OBJECT_NULL;
3977 }
3978 tmp_start_idx = cur_idx;
3979 abort_run = TRUE;
3980 continue;
3981 }
3982 if (m1->pmapped)
3983 refmod = pmap_disconnect(m1->phys_page);
3984 else
3985 refmod = 0;
3986 vm_page_copy(m1, m2);
3987
3988 m2->reference = m1->reference;
3989 m2->dirty = m1->dirty;
3990
3991 if (refmod & VM_MEM_REFERENCED)
3992 m2->reference = TRUE;
3993 if (refmod & VM_MEM_MODIFIED)
3994 m2->dirty = TRUE;
3995 offset = m1->offset;
3996
3997 /*
3998 * completely cleans up the state
3999 * of the page so that it is ready
4000 * to be put onto the free list, or
4001 * for this purpose it looks like it
4002 * just came off of the free list
4003 */
4004 vm_page_free_prepare(m1);
4005
4006 /*
4007 * make sure we clear the ref/mod state
4008 * from the pmap layer... else we risk
4009 * inheriting state from the last time
4010 * this page was used...
4011 */
4012 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4013 /*
4014 * now put the substitute page on the object
4015 */
4016 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
4017
4018 if (m2->reference)
4019 vm_page_activate(m2);
4020 else
4021 vm_page_deactivate(m2);
4022
4023 PAGE_WAKEUP_DONE(m2);
4024
4025 } else {
4026 /*
4027 * completely cleans up the state
4028 * of the page so that it is ready
4029 * to be put onto the free list, or
4030 * for this purpose it looks like it
4031 * just came off of the free list
4032 */
4033 vm_page_free_prepare(m1);
4034 }
4035 #if MACH_ASSERT
4036 stolen_pages++;
4037 #endif
4038 }
4039 m1->pageq.next = (queue_entry_t) m;
4040 m1->pageq.prev = NULL;
4041 m = m1;
4042 }
4043 if (locked_object) {
4044 vm_object_unlock(locked_object);
4045 locked_object = VM_OBJECT_NULL;
4046 }
4047
4048 if (abort_run == TRUE) {
4049 if (m != VM_PAGE_NULL) {
4050 vm_page_free_list(m, FALSE);
4051 }
4052 #if MACH_ASSERT
4053 dumped_run++;
4054 #endif
4055 /*
4056 * want the index of the last
4057 * page in this run that was
4058 * successfully 'stolen', so back
4059 * it up 1 for the auto-decrement on use
4060 * and 1 more to bump back over this page
4061 */
4062 page_idx = tmp_start_idx + 2;
4063 if (page_idx >= vm_pages_count) {
4064 if (wrapped)
4065 goto done_scanning;
4066 page_idx = last_idx = 0;
4067 wrapped = TRUE;
4068 }
4069 abort_run = FALSE;
4070
4071 /*
4072 * We didn't find a contiguous range but we didn't
4073 * start from the very first page.
4074 * Start again from the very first page.
4075 */
4076 RESET_STATE_OF_RUN();
4077
4078 if( flags & KMA_LOMEM)
4079 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4080 else
4081 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4082
4083 last_idx = page_idx;
4084
4085 lck_mtx_lock(&vm_page_queue_free_lock);
4086 /*
4087 * reset our free page limit since we
4088 * dropped the lock protecting the vm_page_free_queue
4089 */
4090 free_available = vm_page_free_count - vm_page_free_reserved;
4091 goto retry;
4092 }
4093
4094 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4095
4096 if (wire == TRUE)
4097 m1->wire_count++;
4098 else
4099 m1->gobbled = TRUE;
4100 }
4101 if (wire == FALSE)
4102 vm_page_gobble_count += npages;
4103
4104 /*
4105 * gobbled pages are also counted as wired pages
4106 */
4107 vm_page_wire_count += npages;
4108
4109 assert(vm_page_verify_contiguous(m, npages));
4110 }
4111 done_scanning:
4112 vm_page_unlock_queues();
4113
4114 #if DEBUG
4115 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4116
4117 tv_end_sec -= tv_start_sec;
4118 if (tv_end_usec < tv_start_usec) {
4119 tv_end_sec--;
4120 tv_end_usec += 1000000;
4121 }
4122 tv_end_usec -= tv_start_usec;
4123 if (tv_end_usec >= 1000000) {
4124 tv_end_sec++;
4125 tv_end_sec -= 1000000;
4126 }
4127 if (vm_page_find_contig_debug) {
4128 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4129 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4130 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4131 scanned, yielded, dumped_run, stolen_pages);
4132 }
4133
4134 #endif
4135 #if MACH_ASSERT
4136 vm_page_verify_free_lists();
4137 #endif
4138 return m;
4139 }
4140
4141 /*
4142 * Allocate a list of contiguous, wired pages.
4143 */
4144 kern_return_t
4145 cpm_allocate(
4146 vm_size_t size,
4147 vm_page_t *list,
4148 ppnum_t max_pnum,
4149 ppnum_t pnum_mask,
4150 boolean_t wire,
4151 int flags)
4152 {
4153 vm_page_t pages;
4154 unsigned int npages;
4155
4156 if (size % page_size != 0)
4157 return KERN_INVALID_ARGUMENT;
4158
4159 npages = (unsigned int) (size / PAGE_SIZE);
4160 if (npages != size / PAGE_SIZE) {
4161 /* 32-bit overflow */
4162 return KERN_INVALID_ARGUMENT;
4163 }
4164
4165 /*
4166 * Obtain a pointer to a subset of the free
4167 * list large enough to satisfy the request;
4168 * the region will be physically contiguous.
4169 */
4170 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4171
4172 if (pages == VM_PAGE_NULL)
4173 return KERN_NO_SPACE;
4174 /*
4175 * determine need for wakeups
4176 */
4177 if ((vm_page_free_count < vm_page_free_min) ||
4178 ((vm_page_free_count < vm_page_free_target) &&
4179 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4180 thread_wakeup((event_t) &vm_page_free_wanted);
4181
4182 #if CONFIG_EMBEDDED
4183 {
4184 int percent_avail;
4185
4186 /*
4187 * Decide if we need to poke the memorystatus notification thread.
4188 */
4189 percent_avail =
4190 (vm_page_active_count + vm_page_inactive_count +
4191 vm_page_speculative_count + vm_page_free_count +
4192 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
4193 atop_64(max_mem);
4194 if (percent_avail <= (kern_memorystatus_level - 5)) {
4195 kern_memorystatus_level = percent_avail;
4196 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4197 }
4198 }
4199 #endif
4200 /*
4201 * The CPM pages should now be available and
4202 * ordered by ascending physical address.
4203 */
4204 assert(vm_page_verify_contiguous(pages, npages));
4205
4206 *list = pages;
4207 return KERN_SUCCESS;
4208 }
4209
4210 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4211
4212 static vm_page_t hibernate_gobble_queue;
4213
4214 static void
4215 hibernate_page_list_zero(hibernate_page_list_t *list)
4216 {
4217 uint32_t bank;
4218 hibernate_bitmap_t * bitmap;
4219
4220 bitmap = &list->bank_bitmap[0];
4221 for (bank = 0; bank < list->bank_count; bank++)
4222 {
4223 uint32_t last_bit;
4224
4225 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4226 // set out-of-bound bits at end of bitmap.
4227 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4228 if (last_bit)
4229 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4230
4231 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4232 }
4233 }
4234
4235 void
4236 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4237 {
4238 uint32_t i;
4239 vm_page_t m;
4240 uint64_t start, end, timeout, nsec;
4241 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4242 clock_get_uptime(&start);
4243
4244 for (i = 0; i < gobble_count; i++)
4245 {
4246 while (VM_PAGE_NULL == (m = vm_page_grab()))
4247 {
4248 clock_get_uptime(&end);
4249 if (end >= timeout)
4250 break;
4251 VM_PAGE_WAIT();
4252 }
4253 if (!m)
4254 break;
4255 m->busy = FALSE;
4256 vm_page_gobble(m);
4257
4258 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4259 hibernate_gobble_queue = m;
4260 }
4261
4262 clock_get_uptime(&end);
4263 absolutetime_to_nanoseconds(end - start, &nsec);
4264 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4265 }
4266
4267 void
4268 hibernate_free_gobble_pages(void)
4269 {
4270 vm_page_t m, next;
4271 uint32_t count = 0;
4272
4273 m = (vm_page_t) hibernate_gobble_queue;
4274 while(m)
4275 {
4276 next = (vm_page_t) m->pageq.next;
4277 vm_page_free(m);
4278 count++;
4279 m = next;
4280 }
4281 hibernate_gobble_queue = VM_PAGE_NULL;
4282
4283 if (count)
4284 HIBLOG("Freed %d pages\n", count);
4285 }
4286
4287 static boolean_t
4288 hibernate_consider_discard(vm_page_t m)
4289 {
4290 vm_object_t object = NULL;
4291 int refmod_state;
4292 boolean_t discard = FALSE;
4293
4294 do
4295 {
4296 if(m->private)
4297 panic("hibernate_consider_discard: private");
4298
4299 if (!vm_object_lock_try(m->object))
4300 break;
4301
4302 object = m->object;
4303
4304 if (VM_PAGE_WIRED(m))
4305 break;
4306 if (m->precious)
4307 break;
4308
4309 if (m->busy || !object->alive)
4310 /*
4311 * Somebody is playing with this page.
4312 */
4313 break;
4314
4315 if (m->absent || m->unusual || m->error)
4316 /*
4317 * If it's unusual in anyway, ignore it
4318 */
4319 break;
4320
4321 if (m->cleaning)
4322 break;
4323
4324 if (m->laundry || m->list_req_pending)
4325 break;
4326
4327 if (!m->dirty)
4328 {
4329 refmod_state = pmap_get_refmod(m->phys_page);
4330
4331 if (refmod_state & VM_MEM_REFERENCED)
4332 m->reference = TRUE;
4333 if (refmod_state & VM_MEM_MODIFIED)
4334 m->dirty = TRUE;
4335 }
4336
4337 /*
4338 * If it's clean or purgeable we can discard the page on wakeup.
4339 */
4340 discard = (!m->dirty)
4341 || (VM_PURGABLE_VOLATILE == object->purgable)
4342 || (VM_PURGABLE_EMPTY == m->object->purgable);
4343 }
4344 while (FALSE);
4345
4346 if (object)
4347 vm_object_unlock(object);
4348
4349 return (discard);
4350 }
4351
4352
4353 static void
4354 hibernate_discard_page(vm_page_t m)
4355 {
4356 if (m->absent || m->unusual || m->error)
4357 /*
4358 * If it's unusual in anyway, ignore
4359 */
4360 return;
4361
4362 if (m->pmapped == TRUE)
4363 {
4364 __unused int refmod_state = pmap_disconnect(m->phys_page);
4365 }
4366
4367 if (m->laundry)
4368 panic("hibernate_discard_page(%p) laundry", m);
4369 if (m->private)
4370 panic("hibernate_discard_page(%p) private", m);
4371 if (m->fictitious)
4372 panic("hibernate_discard_page(%p) fictitious", m);
4373
4374 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4375 {
4376 /* object should be on a queue */
4377 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4378 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4379 assert(old_queue);
4380 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4381 makes sure these locks are uncontended before sleep */
4382 vm_purgeable_token_delete_first(old_queue);
4383 m->object->purgable = VM_PURGABLE_EMPTY;
4384 }
4385
4386 vm_page_free(m);
4387 }
4388
4389 /*
4390 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4391 pages known to VM to not need saving are subtracted.
4392 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4393 */
4394
4395 void
4396 hibernate_page_list_setall(hibernate_page_list_t * page_list,
4397 hibernate_page_list_t * page_list_wired,
4398 uint32_t * pagesOut)
4399 {
4400 uint64_t start, end, nsec;
4401 vm_page_t m;
4402 uint32_t pages = page_list->page_count;
4403 uint32_t count_zf = 0, count_throttled = 0;
4404 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
4405 uint32_t count_wire = pages;
4406 uint32_t count_discard_active = 0;
4407 uint32_t count_discard_inactive = 0;
4408 uint32_t count_discard_purgeable = 0;
4409 uint32_t count_discard_speculative = 0;
4410 uint32_t i;
4411 uint32_t bank;
4412 hibernate_bitmap_t * bitmap;
4413 hibernate_bitmap_t * bitmap_wired;
4414
4415
4416 HIBLOG("hibernate_page_list_setall start\n");
4417
4418 clock_get_uptime(&start);
4419
4420 hibernate_page_list_zero(page_list);
4421 hibernate_page_list_zero(page_list_wired);
4422
4423 if (vm_page_local_q) {
4424 for (i = 0; i < vm_page_local_q_count; i++)
4425 vm_page_reactivate_local(i, TRUE, TRUE);
4426 }
4427
4428 m = (vm_page_t) hibernate_gobble_queue;
4429 while(m)
4430 {
4431 pages--;
4432 count_wire--;
4433 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4434 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4435 m = (vm_page_t) m->pageq.next;
4436 }
4437
4438 for( i = 0; i < vm_colors; i++ )
4439 {
4440 queue_iterate(&vm_page_queue_free[i],
4441 m,
4442 vm_page_t,
4443 pageq)
4444 {
4445 pages--;
4446 count_wire--;
4447 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4448 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4449 }
4450 }
4451
4452 queue_iterate(&vm_lopage_queue_free,
4453 m,
4454 vm_page_t,
4455 pageq)
4456 {
4457 pages--;
4458 count_wire--;
4459 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4460 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4461 }
4462
4463 queue_iterate( &vm_page_queue_throttled,
4464 m,
4465 vm_page_t,
4466 pageq )
4467 {
4468 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4469 && hibernate_consider_discard(m))
4470 {
4471 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4472 count_discard_inactive++;
4473 }
4474 else
4475 count_throttled++;
4476 count_wire--;
4477 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4478 }
4479
4480 queue_iterate( &vm_page_queue_zf,
4481 m,
4482 vm_page_t,
4483 pageq )
4484 {
4485 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4486 && hibernate_consider_discard(m))
4487 {
4488 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4489 if (m->dirty)
4490 count_discard_purgeable++;
4491 else
4492 count_discard_inactive++;
4493 }
4494 else
4495 count_zf++;
4496 count_wire--;
4497 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4498 }
4499
4500 queue_iterate( &vm_page_queue_inactive,
4501 m,
4502 vm_page_t,
4503 pageq )
4504 {
4505 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4506 && hibernate_consider_discard(m))
4507 {
4508 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4509 if (m->dirty)
4510 count_discard_purgeable++;
4511 else
4512 count_discard_inactive++;
4513 }
4514 else
4515 count_inactive++;
4516 count_wire--;
4517 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4518 }
4519
4520 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
4521 {
4522 queue_iterate(&vm_page_queue_speculative[i].age_q,
4523 m,
4524 vm_page_t,
4525 pageq)
4526 {
4527 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
4528 && hibernate_consider_discard(m))
4529 {
4530 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4531 count_discard_speculative++;
4532 }
4533 else
4534 count_speculative++;
4535 count_wire--;
4536 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4537 }
4538 }
4539
4540 queue_iterate( &vm_page_queue_active,
4541 m,
4542 vm_page_t,
4543 pageq )
4544 {
4545 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
4546 && hibernate_consider_discard(m))
4547 {
4548 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4549 if (m->dirty)
4550 count_discard_purgeable++;
4551 else
4552 count_discard_active++;
4553 }
4554 else
4555 count_active++;
4556 count_wire--;
4557 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4558 }
4559
4560 // pull wired from hibernate_bitmap
4561
4562 bitmap = &page_list->bank_bitmap[0];
4563 bitmap_wired = &page_list_wired->bank_bitmap[0];
4564 for (bank = 0; bank < page_list->bank_count; bank++)
4565 {
4566 for (i = 0; i < bitmap->bitmapwords; i++)
4567 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
4568 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
4569 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
4570 }
4571
4572 // machine dependent adjustments
4573 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
4574
4575 clock_get_uptime(&end);
4576 absolutetime_to_nanoseconds(end - start, &nsec);
4577 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
4578
4579 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
4580 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
4581 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
4582
4583 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
4584 }
4585
4586 void
4587 hibernate_page_list_discard(hibernate_page_list_t * page_list)
4588 {
4589 uint64_t start, end, nsec;
4590 vm_page_t m;
4591 vm_page_t next;
4592 uint32_t i;
4593 uint32_t count_discard_active = 0;
4594 uint32_t count_discard_inactive = 0;
4595 uint32_t count_discard_purgeable = 0;
4596 uint32_t count_discard_speculative = 0;
4597
4598 clock_get_uptime(&start);
4599
4600 m = (vm_page_t) queue_first(&vm_page_queue_zf);
4601 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
4602 {
4603 next = (vm_page_t) m->pageq.next;
4604 if (hibernate_page_bittst(page_list, m->phys_page))
4605 {
4606 if (m->dirty)
4607 count_discard_purgeable++;
4608 else
4609 count_discard_inactive++;
4610 hibernate_discard_page(m);
4611 }
4612 m = next;
4613 }
4614
4615 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
4616 {
4617 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
4618 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
4619 {
4620 next = (vm_page_t) m->pageq.next;
4621 if (hibernate_page_bittst(page_list, m->phys_page))
4622 {
4623 count_discard_speculative++;
4624 hibernate_discard_page(m);
4625 }
4626 m = next;
4627 }
4628 }
4629
4630 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
4631 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
4632 {
4633 next = (vm_page_t) m->pageq.next;
4634 if (hibernate_page_bittst(page_list, m->phys_page))
4635 {
4636 if (m->dirty)
4637 count_discard_purgeable++;
4638 else
4639 count_discard_inactive++;
4640 hibernate_discard_page(m);
4641 }
4642 m = next;
4643 }
4644
4645 m = (vm_page_t) queue_first(&vm_page_queue_active);
4646 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
4647 {
4648 next = (vm_page_t) m->pageq.next;
4649 if (hibernate_page_bittst(page_list, m->phys_page))
4650 {
4651 if (m->dirty)
4652 count_discard_purgeable++;
4653 else
4654 count_discard_active++;
4655 hibernate_discard_page(m);
4656 }
4657 m = next;
4658 }
4659
4660 clock_get_uptime(&end);
4661 absolutetime_to_nanoseconds(end - start, &nsec);
4662 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
4663 nsec / 1000000ULL,
4664 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
4665 }
4666
4667 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4668
4669 #include <mach_vm_debug.h>
4670 #if MACH_VM_DEBUG
4671
4672 #include <mach_debug/hash_info.h>
4673 #include <vm/vm_debug.h>
4674
4675 /*
4676 * Routine: vm_page_info
4677 * Purpose:
4678 * Return information about the global VP table.
4679 * Fills the buffer with as much information as possible
4680 * and returns the desired size of the buffer.
4681 * Conditions:
4682 * Nothing locked. The caller should provide
4683 * possibly-pageable memory.
4684 */
4685
4686 unsigned int
4687 vm_page_info(
4688 hash_info_bucket_t *info,
4689 unsigned int count)
4690 {
4691 unsigned int i;
4692 lck_spin_t *bucket_lock;
4693
4694 if (vm_page_bucket_count < count)
4695 count = vm_page_bucket_count;
4696
4697 for (i = 0; i < count; i++) {
4698 vm_page_bucket_t *bucket = &vm_page_buckets[i];
4699 unsigned int bucket_count = 0;
4700 vm_page_t m;
4701
4702 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
4703 lck_spin_lock(bucket_lock);
4704
4705 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
4706 bucket_count++;
4707
4708 lck_spin_unlock(bucket_lock);
4709
4710 /* don't touch pageable memory while holding locks */
4711 info[i].hib_count = bucket_count;
4712 }
4713
4714 return vm_page_bucket_count;
4715 }
4716 #endif /* MACH_VM_DEBUG */
4717
4718 #include <mach_kdb.h>
4719 #if MACH_KDB
4720
4721 #include <ddb/db_output.h>
4722 #include <vm/vm_print.h>
4723 #define printf kdbprintf
4724
4725 /*
4726 * Routine: vm_page_print [exported]
4727 */
4728 void
4729 vm_page_print(
4730 db_addr_t db_addr)
4731 {
4732 vm_page_t p;
4733
4734 p = (vm_page_t) (long) db_addr;
4735
4736 iprintf("page 0x%x\n", p);
4737
4738 db_indent += 2;
4739
4740 iprintf("object=0x%x", p->object);
4741 printf(", offset=0x%x", p->offset);
4742 printf(", wire_count=%d", p->wire_count);
4743
4744 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
4745 (p->local ? "" : "!"),
4746 (p->inactive ? "" : "!"),
4747 (p->active ? "" : "!"),
4748 (p->throttled ? "" : "!"),
4749 (p->gobbled ? "" : "!"),
4750 (p->laundry ? "" : "!"),
4751 (p->free ? "" : "!"),
4752 (p->reference ? "" : "!"),
4753 (p->encrypted ? "" : "!"));
4754 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
4755 (p->busy ? "" : "!"),
4756 (p->wanted ? "" : "!"),
4757 (p->tabled ? "" : "!"),
4758 (p->fictitious ? "" : "!"),
4759 (p->private ? "" : "!"),
4760 (p->precious ? "" : "!"));
4761 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
4762 (p->absent ? "" : "!"),
4763 (p->error ? "" : "!"),
4764 (p->dirty ? "" : "!"),
4765 (p->cleaning ? "" : "!"),
4766 (p->pageout ? "" : "!"),
4767 (p->clustered ? "" : "!"));
4768 iprintf("%soverwriting, %srestart, %sunusual\n",
4769 (p->overwriting ? "" : "!"),
4770 (p->restart ? "" : "!"),
4771 (p->unusual ? "" : "!"));
4772
4773 iprintf("phys_page=0x%x", p->phys_page);
4774
4775 db_indent -= 2;
4776 }
4777 #endif /* MACH_KDB */