]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-1504.9.37.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <ppc/mappings.h> /* (BRINGUP) */
89 #include <pexpert/pexpert.h> /* (BRINGUP) */
90
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94
95 #include <IOKit/IOHibernatePrivate.h>
96
97
98 #if CONFIG_EMBEDDED
99 #include <sys/kern_memorystatus.h>
100 #endif
101
102 #include <sys/kdebug.h>
103
104 boolean_t vm_page_free_verify = TRUE;
105
106 uint_t vm_lopage_free_count = 0;
107 uint_t vm_lopage_free_limit = 0;
108 uint_t vm_lopage_lowater = 0;
109 boolean_t vm_lopage_refill = FALSE;
110 boolean_t vm_lopage_needed = FALSE;
111
112 lck_mtx_ext_t vm_page_queue_lock_ext;
113 lck_mtx_ext_t vm_page_queue_free_lock_ext;
114 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
115
116 int speculative_age_index = 0;
117 int speculative_steal_index = 0;
118 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
119
120
121 __private_extern__ void vm_page_init_lck_grp(void);
122
123 static void vm_page_free_prepare(vm_page_t page);
124
125
126
127 /*
128 * Associated with page of user-allocatable memory is a
129 * page structure.
130 */
131
132 /*
133 * These variables record the values returned by vm_page_bootstrap,
134 * for debugging purposes. The implementation of pmap_steal_memory
135 * and pmap_startup here also uses them internally.
136 */
137
138 vm_offset_t virtual_space_start;
139 vm_offset_t virtual_space_end;
140 int vm_page_pages;
141
142 /*
143 * The vm_page_lookup() routine, which provides for fast
144 * (virtual memory object, offset) to page lookup, employs
145 * the following hash table. The vm_page_{insert,remove}
146 * routines install and remove associations in the table.
147 * [This table is often called the virtual-to-physical,
148 * or VP, table.]
149 */
150 typedef struct {
151 vm_page_t pages;
152 #if MACH_PAGE_HASH_STATS
153 int cur_count; /* current count */
154 int hi_count; /* high water mark */
155 #endif /* MACH_PAGE_HASH_STATS */
156 } vm_page_bucket_t;
157
158
159 #define BUCKETS_PER_LOCK 16
160
161 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
162 unsigned int vm_page_bucket_count = 0; /* How big is array? */
163 unsigned int vm_page_hash_mask; /* Mask for hash function */
164 unsigned int vm_page_hash_shift; /* Shift for hash function */
165 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
166 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
167
168 lck_spin_t *vm_page_bucket_locks;
169
170
171 #if MACH_PAGE_HASH_STATS
172 /* This routine is only for debug. It is intended to be called by
173 * hand by a developer using a kernel debugger. This routine prints
174 * out vm_page_hash table statistics to the kernel debug console.
175 */
176 void
177 hash_debug(void)
178 {
179 int i;
180 int numbuckets = 0;
181 int highsum = 0;
182 int maxdepth = 0;
183
184 for (i = 0; i < vm_page_bucket_count; i++) {
185 if (vm_page_buckets[i].hi_count) {
186 numbuckets++;
187 highsum += vm_page_buckets[i].hi_count;
188 if (vm_page_buckets[i].hi_count > maxdepth)
189 maxdepth = vm_page_buckets[i].hi_count;
190 }
191 }
192 printf("Total number of buckets: %d\n", vm_page_bucket_count);
193 printf("Number used buckets: %d = %d%%\n",
194 numbuckets, 100*numbuckets/vm_page_bucket_count);
195 printf("Number unused buckets: %d = %d%%\n",
196 vm_page_bucket_count - numbuckets,
197 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
198 printf("Sum of bucket max depth: %d\n", highsum);
199 printf("Average bucket depth: %d.%2d\n",
200 highsum/vm_page_bucket_count,
201 highsum%vm_page_bucket_count);
202 printf("Maximum bucket depth: %d\n", maxdepth);
203 }
204 #endif /* MACH_PAGE_HASH_STATS */
205
206 /*
207 * The virtual page size is currently implemented as a runtime
208 * variable, but is constant once initialized using vm_set_page_size.
209 * This initialization must be done in the machine-dependent
210 * bootstrap sequence, before calling other machine-independent
211 * initializations.
212 *
213 * All references to the virtual page size outside this
214 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
215 * constants.
216 */
217 vm_size_t page_size = PAGE_SIZE;
218 vm_size_t page_mask = PAGE_MASK;
219 int page_shift = PAGE_SHIFT;
220
221 /*
222 * Resident page structures are initialized from
223 * a template (see vm_page_alloc).
224 *
225 * When adding a new field to the virtual memory
226 * object structure, be sure to add initialization
227 * (see vm_page_bootstrap).
228 */
229 struct vm_page vm_page_template;
230
231 vm_page_t vm_pages = VM_PAGE_NULL;
232 unsigned int vm_pages_count = 0;
233 ppnum_t vm_page_lowest = 0;
234
235 /*
236 * Resident pages that represent real memory
237 * are allocated from a set of free lists,
238 * one per color.
239 */
240 unsigned int vm_colors;
241 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
242 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
243 queue_head_t vm_page_queue_free[MAX_COLORS];
244 vm_page_t vm_page_queue_fictitious;
245 unsigned int vm_page_free_wanted;
246 unsigned int vm_page_free_wanted_privileged;
247 unsigned int vm_page_free_count;
248 unsigned int vm_page_fictitious_count;
249
250 unsigned int vm_page_free_count_minimum; /* debugging */
251
252 /*
253 * Occasionally, the virtual memory system uses
254 * resident page structures that do not refer to
255 * real pages, for example to leave a page with
256 * important state information in the VP table.
257 *
258 * These page structures are allocated the way
259 * most other kernel structures are.
260 */
261 zone_t vm_page_zone;
262 vm_locks_array_t vm_page_locks;
263 decl_lck_mtx_data(,vm_page_alloc_lock)
264 unsigned int io_throttle_zero_fill;
265
266 unsigned int vm_page_local_q_count = 0;
267 unsigned int vm_page_local_q_soft_limit = 250;
268 unsigned int vm_page_local_q_hard_limit = 500;
269 struct vplq *vm_page_local_q = NULL;
270
271 /*
272 * Fictitious pages don't have a physical address,
273 * but we must initialize phys_page to something.
274 * For debugging, this should be a strange value
275 * that the pmap module can recognize in assertions.
276 */
277 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
278
279 /*
280 * Guard pages are not accessible so they don't
281 * need a physical address, but we need to enter
282 * one in the pmap.
283 * Let's make it recognizable and make sure that
284 * we don't use a real physical page with that
285 * physical address.
286 */
287 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
288
289 /*
290 * Resident page structures are also chained on
291 * queues that are used by the page replacement
292 * system (pageout daemon). These queues are
293 * defined here, but are shared by the pageout
294 * module. The inactive queue is broken into
295 * inactive and zf for convenience as the
296 * pageout daemon often assignes a higher
297 * affinity to zf pages
298 */
299 queue_head_t vm_page_queue_active;
300 queue_head_t vm_page_queue_inactive;
301 queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
302 queue_head_t vm_page_queue_throttled;
303
304 unsigned int vm_page_active_count;
305 unsigned int vm_page_inactive_count;
306 unsigned int vm_page_throttled_count;
307 unsigned int vm_page_speculative_count;
308 unsigned int vm_page_wire_count;
309 unsigned int vm_page_wire_count_initial;
310 unsigned int vm_page_gobble_count = 0;
311 unsigned int vm_page_wire_count_warning = 0;
312 unsigned int vm_page_gobble_count_warning = 0;
313
314 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
315 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
316 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
317
318 #if DEVELOPMENT || DEBUG
319 unsigned int vm_page_speculative_recreated = 0;
320 unsigned int vm_page_speculative_created = 0;
321 unsigned int vm_page_speculative_used = 0;
322 #endif
323
324 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
325 ppnum_t max_valid_low_ppnum = 0xffffffff;
326
327
328 /*
329 * Several page replacement parameters are also
330 * shared with this module, so that page allocation
331 * (done here in vm_page_alloc) can trigger the
332 * pageout daemon.
333 */
334 unsigned int vm_page_free_target = 0;
335 unsigned int vm_page_free_min = 0;
336 unsigned int vm_page_throttle_limit = 0;
337 uint32_t vm_page_creation_throttle = 0;
338 unsigned int vm_page_inactive_target = 0;
339 unsigned int vm_page_inactive_min = 0;
340 unsigned int vm_page_free_reserved = 0;
341 unsigned int vm_page_throttle_count = 0;
342
343 /*
344 * The VM system has a couple of heuristics for deciding
345 * that pages are "uninteresting" and should be placed
346 * on the inactive queue as likely candidates for replacement.
347 * These variables let the heuristics be controlled at run-time
348 * to make experimentation easier.
349 */
350
351 boolean_t vm_page_deactivate_hint = TRUE;
352
353 struct vm_page_stats_reusable vm_page_stats_reusable;
354
355 /*
356 * vm_set_page_size:
357 *
358 * Sets the page size, perhaps based upon the memory
359 * size. Must be called before any use of page-size
360 * dependent functions.
361 *
362 * Sets page_shift and page_mask from page_size.
363 */
364 void
365 vm_set_page_size(void)
366 {
367 page_mask = page_size - 1;
368
369 if ((page_mask & page_size) != 0)
370 panic("vm_set_page_size: page size not a power of two");
371
372 for (page_shift = 0; ; page_shift++)
373 if ((1U << page_shift) == page_size)
374 break;
375 }
376
377
378 /* Called once during statup, once the cache geometry is known.
379 */
380 static void
381 vm_page_set_colors( void )
382 {
383 unsigned int n, override;
384
385 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
386 n = override;
387 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
388 n = vm_cache_geometry_colors;
389 else n = DEFAULT_COLORS; /* use default if all else fails */
390
391 if ( n == 0 )
392 n = 1;
393 if ( n > MAX_COLORS )
394 n = MAX_COLORS;
395
396 /* the count must be a power of 2 */
397 if ( ( n & (n - 1)) != 0 )
398 panic("vm_page_set_colors");
399
400 vm_colors = n;
401 vm_color_mask = n - 1;
402 }
403
404
405 lck_grp_t vm_page_lck_grp_free;
406 lck_grp_t vm_page_lck_grp_queue;
407 lck_grp_t vm_page_lck_grp_local;
408 lck_grp_t vm_page_lck_grp_purge;
409 lck_grp_t vm_page_lck_grp_alloc;
410 lck_grp_t vm_page_lck_grp_bucket;
411 lck_grp_attr_t vm_page_lck_grp_attr;
412 lck_attr_t vm_page_lck_attr;
413
414
415 __private_extern__ void
416 vm_page_init_lck_grp(void)
417 {
418 /*
419 * initialze the vm_page lock world
420 */
421 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
422 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
423 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
424 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
425 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
426 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
427 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
428 lck_attr_setdefault(&vm_page_lck_attr);
429 }
430
431 void
432 vm_page_init_local_q()
433 {
434 unsigned int num_cpus;
435 unsigned int i;
436 struct vplq *t_local_q;
437
438 num_cpus = ml_get_max_cpus();
439
440 /*
441 * no point in this for a uni-processor system
442 */
443 if (num_cpus >= 2) {
444 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
445
446 for (i = 0; i < num_cpus; i++) {
447 struct vpl *lq;
448
449 lq = &t_local_q[i].vpl_un.vpl;
450 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
451 queue_init(&lq->vpl_queue);
452 lq->vpl_count = 0;
453 }
454 vm_page_local_q_count = num_cpus;
455
456 vm_page_local_q = (struct vplq *)t_local_q;
457 }
458 }
459
460
461 uint64_t initial_max_mem;
462 int initial_wire_count;
463 int initial_free_count;
464 int initial_lopage_count;
465
466 /*
467 * vm_page_bootstrap:
468 *
469 * Initializes the resident memory module.
470 *
471 * Allocates memory for the page cells, and
472 * for the object/offset-to-page hash table headers.
473 * Each page cell is initialized and placed on the free list.
474 * Returns the range of available kernel virtual memory.
475 */
476
477 void
478 vm_page_bootstrap(
479 vm_offset_t *startp,
480 vm_offset_t *endp)
481 {
482 register vm_page_t m;
483 unsigned int i;
484 unsigned int log1;
485 unsigned int log2;
486 unsigned int size;
487
488 /*
489 * Initialize the vm_page template.
490 */
491
492 m = &vm_page_template;
493 bzero(m, sizeof (*m));
494
495 m->pageq.next = NULL;
496 m->pageq.prev = NULL;
497 m->listq.next = NULL;
498 m->listq.prev = NULL;
499 m->next = VM_PAGE_NULL;
500
501 m->object = VM_OBJECT_NULL; /* reset later */
502 m->offset = (vm_object_offset_t) -1; /* reset later */
503
504 m->wire_count = 0;
505 m->local = FALSE;
506 m->inactive = FALSE;
507 m->active = FALSE;
508 m->pageout_queue = FALSE;
509 m->speculative = FALSE;
510 m->laundry = FALSE;
511 m->free = FALSE;
512 m->reference = FALSE;
513 m->gobbled = FALSE;
514 m->private = FALSE;
515 m->throttled = FALSE;
516 m->__unused_pageq_bits = 0;
517
518 m->phys_page = 0; /* reset later */
519
520 m->busy = TRUE;
521 m->wanted = FALSE;
522 m->tabled = FALSE;
523 m->fictitious = FALSE;
524 m->pmapped = FALSE;
525 m->wpmapped = FALSE;
526 m->pageout = FALSE;
527 m->absent = FALSE;
528 m->error = FALSE;
529 m->dirty = FALSE;
530 m->cleaning = FALSE;
531 m->precious = FALSE;
532 m->clustered = FALSE;
533 m->overwriting = FALSE;
534 m->restart = FALSE;
535 m->unusual = FALSE;
536 m->encrypted = FALSE;
537 m->encrypted_cleaning = FALSE;
538 m->list_req_pending = FALSE;
539 m->dump_cleaning = FALSE;
540 m->cs_validated = FALSE;
541 m->cs_tainted = FALSE;
542 m->no_cache = FALSE;
543 m->zero_fill = FALSE;
544 m->reusable = FALSE;
545 m->__unused_object_bits = 0;
546
547
548 /*
549 * Initialize the page queues.
550 */
551 vm_page_init_lck_grp();
552
553 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
554 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
555 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
556
557 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
558 int group;
559
560 purgeable_queues[i].token_q_head = 0;
561 purgeable_queues[i].token_q_tail = 0;
562 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
563 queue_init(&purgeable_queues[i].objq[group]);
564
565 purgeable_queues[i].type = i;
566 purgeable_queues[i].new_pages = 0;
567 #if MACH_ASSERT
568 purgeable_queues[i].debug_count_tokens = 0;
569 purgeable_queues[i].debug_count_objects = 0;
570 #endif
571 };
572
573 for (i = 0; i < MAX_COLORS; i++ )
574 queue_init(&vm_page_queue_free[i]);
575 queue_init(&vm_lopage_queue_free);
576 vm_page_queue_fictitious = VM_PAGE_NULL;
577 queue_init(&vm_page_queue_active);
578 queue_init(&vm_page_queue_inactive);
579 queue_init(&vm_page_queue_throttled);
580 queue_init(&vm_page_queue_zf);
581
582 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
583 queue_init(&vm_page_queue_speculative[i].age_q);
584
585 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
586 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
587 }
588 vm_page_free_wanted = 0;
589 vm_page_free_wanted_privileged = 0;
590
591 vm_page_set_colors();
592
593
594 /*
595 * Steal memory for the map and zone subsystems.
596 */
597
598 vm_map_steal_memory();
599 zone_steal_memory();
600
601 /*
602 * Allocate (and initialize) the virtual-to-physical
603 * table hash buckets.
604 *
605 * The number of buckets should be a power of two to
606 * get a good hash function. The following computation
607 * chooses the first power of two that is greater
608 * than the number of physical pages in the system.
609 */
610
611 if (vm_page_bucket_count == 0) {
612 unsigned int npages = pmap_free_pages();
613
614 vm_page_bucket_count = 1;
615 while (vm_page_bucket_count < npages)
616 vm_page_bucket_count <<= 1;
617 }
618 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
619
620 vm_page_hash_mask = vm_page_bucket_count - 1;
621
622 /*
623 * Calculate object shift value for hashing algorithm:
624 * O = log2(sizeof(struct vm_object))
625 * B = log2(vm_page_bucket_count)
626 * hash shifts the object left by
627 * B/2 - O
628 */
629 size = vm_page_bucket_count;
630 for (log1 = 0; size > 1; log1++)
631 size /= 2;
632 size = sizeof(struct vm_object);
633 for (log2 = 0; size > 1; log2++)
634 size /= 2;
635 vm_page_hash_shift = log1/2 - log2 + 1;
636
637 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
638 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
639 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
640
641 if (vm_page_hash_mask & vm_page_bucket_count)
642 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
643
644 vm_page_buckets = (vm_page_bucket_t *)
645 pmap_steal_memory(vm_page_bucket_count *
646 sizeof(vm_page_bucket_t));
647
648 vm_page_bucket_locks = (lck_spin_t *)
649 pmap_steal_memory(vm_page_bucket_lock_count *
650 sizeof(lck_spin_t));
651
652 for (i = 0; i < vm_page_bucket_count; i++) {
653 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
654
655 bucket->pages = VM_PAGE_NULL;
656 #if MACH_PAGE_HASH_STATS
657 bucket->cur_count = 0;
658 bucket->hi_count = 0;
659 #endif /* MACH_PAGE_HASH_STATS */
660 }
661
662 for (i = 0; i < vm_page_bucket_lock_count; i++)
663 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
664
665 /*
666 * Machine-dependent code allocates the resident page table.
667 * It uses vm_page_init to initialize the page frames.
668 * The code also returns to us the virtual space available
669 * to the kernel. We don't trust the pmap module
670 * to get the alignment right.
671 */
672
673 pmap_startup(&virtual_space_start, &virtual_space_end);
674 virtual_space_start = round_page(virtual_space_start);
675 virtual_space_end = trunc_page(virtual_space_end);
676
677 *startp = virtual_space_start;
678 *endp = virtual_space_end;
679
680 /*
681 * Compute the initial "wire" count.
682 * Up until now, the pages which have been set aside are not under
683 * the VM system's control, so although they aren't explicitly
684 * wired, they nonetheless can't be moved. At this moment,
685 * all VM managed pages are "free", courtesy of pmap_startup.
686 */
687 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
688 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
689 vm_page_wire_count_initial = vm_page_wire_count;
690 vm_page_free_count_minimum = vm_page_free_count;
691
692 initial_max_mem = max_mem;
693 initial_wire_count = vm_page_wire_count;
694 initial_free_count = vm_page_free_count;
695 initial_lopage_count = vm_lopage_free_count;
696
697 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
698 vm_page_free_count, vm_page_wire_count);
699
700 simple_lock_init(&vm_paging_lock, 0);
701 }
702
703 #ifndef MACHINE_PAGES
704 /*
705 * We implement pmap_steal_memory and pmap_startup with the help
706 * of two simpler functions, pmap_virtual_space and pmap_next_page.
707 */
708
709 void *
710 pmap_steal_memory(
711 vm_size_t size)
712 {
713 vm_offset_t addr, vaddr;
714 ppnum_t phys_page;
715
716 /*
717 * We round the size to a round multiple.
718 */
719
720 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
721
722 /*
723 * If this is the first call to pmap_steal_memory,
724 * we have to initialize ourself.
725 */
726
727 if (virtual_space_start == virtual_space_end) {
728 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
729
730 /*
731 * The initial values must be aligned properly, and
732 * we don't trust the pmap module to do it right.
733 */
734
735 virtual_space_start = round_page(virtual_space_start);
736 virtual_space_end = trunc_page(virtual_space_end);
737 }
738
739 /*
740 * Allocate virtual memory for this request.
741 */
742
743 addr = virtual_space_start;
744 virtual_space_start += size;
745
746 kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
747
748 /*
749 * Allocate and map physical pages to back new virtual pages.
750 */
751
752 for (vaddr = round_page(addr);
753 vaddr < addr + size;
754 vaddr += PAGE_SIZE) {
755
756 if (!pmap_next_page_hi(&phys_page))
757 panic("pmap_steal_memory");
758
759 /*
760 * XXX Logically, these mappings should be wired,
761 * but some pmap modules barf if they are.
762 */
763 #if defined(__LP64__)
764 pmap_pre_expand(kernel_pmap, vaddr);
765 #endif
766
767 pmap_enter(kernel_pmap, vaddr, phys_page,
768 VM_PROT_READ|VM_PROT_WRITE,
769 VM_WIMG_USE_DEFAULT, FALSE);
770 /*
771 * Account for newly stolen memory
772 */
773 vm_page_wire_count++;
774
775 }
776
777 return (void *) addr;
778 }
779
780 void
781 pmap_startup(
782 vm_offset_t *startp,
783 vm_offset_t *endp)
784 {
785 unsigned int i, npages, pages_initialized, fill, fillval;
786 ppnum_t phys_page;
787 addr64_t tmpaddr;
788
789 /*
790 * We calculate how many page frames we will have
791 * and then allocate the page structures in one chunk.
792 */
793
794 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
795 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
796 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
797
798 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
799
800 /*
801 * Initialize the page frames.
802 */
803 for (i = 0, pages_initialized = 0; i < npages; i++) {
804 if (!pmap_next_page(&phys_page))
805 break;
806 if (pages_initialized == 0 || phys_page < vm_page_lowest)
807 vm_page_lowest = phys_page;
808
809 vm_page_init(&vm_pages[i], phys_page, FALSE);
810 vm_page_pages++;
811 pages_initialized++;
812 }
813 vm_pages_count = pages_initialized;
814
815 /*
816 * Check if we want to initialize pages to a known value
817 */
818 fill = 0; /* Assume no fill */
819 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
820
821 // -debug code remove
822 if (2 == vm_himemory_mode) {
823 // free low -> high so high is preferred
824 for (i = 1; i <= pages_initialized; i++) {
825 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
826 vm_page_release(&vm_pages[i - 1]);
827 }
828 }
829 else
830 // debug code remove-
831
832 /*
833 * Release pages in reverse order so that physical pages
834 * initially get allocated in ascending addresses. This keeps
835 * the devices (which must address physical memory) happy if
836 * they require several consecutive pages.
837 */
838 for (i = pages_initialized; i > 0; i--) {
839 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
840 vm_page_release(&vm_pages[i - 1]);
841 }
842
843 #if 0
844 {
845 vm_page_t xx, xxo, xxl;
846 int i, j, k, l;
847
848 j = 0; /* (BRINGUP) */
849 xxl = 0;
850
851 for( i = 0; i < vm_colors; i++ ) {
852 queue_iterate(&vm_page_queue_free[i],
853 xx,
854 vm_page_t,
855 pageq) { /* BRINGUP */
856 j++; /* (BRINGUP) */
857 if(j > vm_page_free_count) { /* (BRINGUP) */
858 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
859 }
860
861 l = vm_page_free_count - j; /* (BRINGUP) */
862 k = 0; /* (BRINGUP) */
863
864 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
865
866 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
867 k++;
868 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
869 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
870 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
871 }
872 }
873
874 xxl = xx;
875 }
876 }
877
878 if(j != vm_page_free_count) { /* (BRINGUP) */
879 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
880 }
881 }
882 #endif
883
884
885 /*
886 * We have to re-align virtual_space_start,
887 * because pmap_steal_memory has been using it.
888 */
889
890 virtual_space_start = round_page(virtual_space_start);
891
892 *startp = virtual_space_start;
893 *endp = virtual_space_end;
894 }
895 #endif /* MACHINE_PAGES */
896
897 /*
898 * Routine: vm_page_module_init
899 * Purpose:
900 * Second initialization pass, to be done after
901 * the basic VM system is ready.
902 */
903 void
904 vm_page_module_init(void)
905 {
906 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
907 0, PAGE_SIZE, "vm pages");
908
909 #if ZONE_DEBUG
910 zone_debug_disable(vm_page_zone);
911 #endif /* ZONE_DEBUG */
912
913 zone_change(vm_page_zone, Z_EXPAND, FALSE);
914 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
915 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
916
917 /*
918 * Adjust zone statistics to account for the real pages allocated
919 * in vm_page_create(). [Q: is this really what we want?]
920 */
921 vm_page_zone->count += vm_page_pages;
922 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
923
924 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
925 }
926
927 /*
928 * Routine: vm_page_create
929 * Purpose:
930 * After the VM system is up, machine-dependent code
931 * may stumble across more physical memory. For example,
932 * memory that it was reserving for a frame buffer.
933 * vm_page_create turns this memory into available pages.
934 */
935
936 void
937 vm_page_create(
938 ppnum_t start,
939 ppnum_t end)
940 {
941 ppnum_t phys_page;
942 vm_page_t m;
943
944 for (phys_page = start;
945 phys_page < end;
946 phys_page++) {
947 while ((m = (vm_page_t) vm_page_grab_fictitious())
948 == VM_PAGE_NULL)
949 vm_page_more_fictitious();
950
951 vm_page_init(m, phys_page, FALSE);
952 pmap_clear_noencrypt(phys_page);
953 vm_page_pages++;
954 vm_page_release(m);
955 }
956 }
957
958 /*
959 * vm_page_hash:
960 *
961 * Distributes the object/offset key pair among hash buckets.
962 *
963 * NOTE: The bucket count must be a power of 2
964 */
965 #define vm_page_hash(object, offset) (\
966 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
967 & vm_page_hash_mask)
968
969
970 /*
971 * vm_page_insert: [ internal use only ]
972 *
973 * Inserts the given mem entry into the object/object-page
974 * table and object list.
975 *
976 * The object must be locked.
977 */
978 void
979 vm_page_insert(
980 vm_page_t mem,
981 vm_object_t object,
982 vm_object_offset_t offset)
983 {
984 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
985 }
986
987 void
988 vm_page_insert_internal(
989 vm_page_t mem,
990 vm_object_t object,
991 vm_object_offset_t offset,
992 boolean_t queues_lock_held,
993 boolean_t insert_in_hash)
994 {
995 vm_page_bucket_t *bucket;
996 lck_spin_t *bucket_lock;
997 int hash_id;
998
999 XPR(XPR_VM_PAGE,
1000 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1001 object, offset, mem, 0,0);
1002
1003 VM_PAGE_CHECK(mem);
1004
1005 if (object == vm_submap_object) {
1006 /* the vm_submap_object is only a placeholder for submaps */
1007 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1008 }
1009
1010 vm_object_lock_assert_exclusive(object);
1011 #if DEBUG
1012 lck_mtx_assert(&vm_page_queue_lock,
1013 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1014 : LCK_MTX_ASSERT_NOTOWNED);
1015 #endif /* DEBUG */
1016
1017 if (insert_in_hash == TRUE) {
1018 #if DEBUG
1019 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1020 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1021 "already in (obj=%p,off=0x%llx)",
1022 mem, object, offset, mem->object, mem->offset);
1023 #endif
1024 assert(!object->internal || offset < object->size);
1025
1026 /* only insert "pageout" pages into "pageout" objects,
1027 * and normal pages into normal objects */
1028 assert(object->pageout == mem->pageout);
1029
1030 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1031
1032 /*
1033 * Record the object/offset pair in this page
1034 */
1035
1036 mem->object = object;
1037 mem->offset = offset;
1038
1039 /*
1040 * Insert it into the object_object/offset hash table
1041 */
1042 hash_id = vm_page_hash(object, offset);
1043 bucket = &vm_page_buckets[hash_id];
1044 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1045
1046 lck_spin_lock(bucket_lock);
1047
1048 mem->next = bucket->pages;
1049 bucket->pages = mem;
1050 #if MACH_PAGE_HASH_STATS
1051 if (++bucket->cur_count > bucket->hi_count)
1052 bucket->hi_count = bucket->cur_count;
1053 #endif /* MACH_PAGE_HASH_STATS */
1054
1055 lck_spin_unlock(bucket_lock);
1056 }
1057 /*
1058 * Now link into the object's list of backed pages.
1059 */
1060
1061 VM_PAGE_INSERT(mem, object);
1062 mem->tabled = TRUE;
1063
1064 /*
1065 * Show that the object has one more resident page.
1066 */
1067
1068 object->resident_page_count++;
1069 if (VM_PAGE_WIRED(mem)) {
1070 object->wired_page_count++;
1071 }
1072 assert(object->resident_page_count >= object->wired_page_count);
1073
1074 assert(!mem->reusable);
1075
1076 if (object->purgable == VM_PURGABLE_VOLATILE) {
1077 if (VM_PAGE_WIRED(mem)) {
1078 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1079 } else {
1080 OSAddAtomic(1, &vm_page_purgeable_count);
1081 }
1082 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1083 mem->throttled) {
1084 /*
1085 * This page belongs to a purged VM object but hasn't
1086 * been purged (because it was "busy").
1087 * It's in the "throttled" queue and hence not
1088 * visible to vm_pageout_scan(). Move it to a pageable
1089 * queue, so that it can eventually be reclaimed, instead
1090 * of lingering in the "empty" object.
1091 */
1092 if (queues_lock_held == FALSE)
1093 vm_page_lockspin_queues();
1094 vm_page_deactivate(mem);
1095 if (queues_lock_held == FALSE)
1096 vm_page_unlock_queues();
1097 }
1098 }
1099
1100 /*
1101 * vm_page_replace:
1102 *
1103 * Exactly like vm_page_insert, except that we first
1104 * remove any existing page at the given offset in object.
1105 *
1106 * The object must be locked.
1107 */
1108 void
1109 vm_page_replace(
1110 register vm_page_t mem,
1111 register vm_object_t object,
1112 register vm_object_offset_t offset)
1113 {
1114 vm_page_bucket_t *bucket;
1115 vm_page_t found_m = VM_PAGE_NULL;
1116 lck_spin_t *bucket_lock;
1117 int hash_id;
1118
1119 VM_PAGE_CHECK(mem);
1120 vm_object_lock_assert_exclusive(object);
1121 #if DEBUG
1122 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1123 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1124 "already in (obj=%p,off=0x%llx)",
1125 mem, object, offset, mem->object, mem->offset);
1126 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1127 #endif
1128 /*
1129 * Record the object/offset pair in this page
1130 */
1131
1132 mem->object = object;
1133 mem->offset = offset;
1134
1135 /*
1136 * Insert it into the object_object/offset hash table,
1137 * replacing any page that might have been there.
1138 */
1139
1140 hash_id = vm_page_hash(object, offset);
1141 bucket = &vm_page_buckets[hash_id];
1142 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1143
1144 lck_spin_lock(bucket_lock);
1145
1146 if (bucket->pages) {
1147 vm_page_t *mp = &bucket->pages;
1148 vm_page_t m = *mp;
1149
1150 do {
1151 if (m->object == object && m->offset == offset) {
1152 /*
1153 * Remove old page from hash list
1154 */
1155 *mp = m->next;
1156
1157 found_m = m;
1158 break;
1159 }
1160 mp = &m->next;
1161 } while ((m = *mp));
1162
1163 mem->next = bucket->pages;
1164 } else {
1165 mem->next = VM_PAGE_NULL;
1166 }
1167 /*
1168 * insert new page at head of hash list
1169 */
1170 bucket->pages = mem;
1171
1172 lck_spin_unlock(bucket_lock);
1173
1174 if (found_m) {
1175 /*
1176 * there was already a page at the specified
1177 * offset for this object... remove it from
1178 * the object and free it back to the free list
1179 */
1180 vm_page_free_unlocked(found_m, FALSE);
1181 }
1182 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1183 }
1184
1185 /*
1186 * vm_page_remove: [ internal use only ]
1187 *
1188 * Removes the given mem entry from the object/offset-page
1189 * table and the object page list.
1190 *
1191 * The object must be locked.
1192 */
1193
1194 void
1195 vm_page_remove(
1196 vm_page_t mem,
1197 boolean_t remove_from_hash)
1198 {
1199 vm_page_bucket_t *bucket;
1200 vm_page_t this;
1201 lck_spin_t *bucket_lock;
1202 int hash_id;
1203
1204 XPR(XPR_VM_PAGE,
1205 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1206 mem->object, mem->offset,
1207 mem, 0,0);
1208
1209 vm_object_lock_assert_exclusive(mem->object);
1210 assert(mem->tabled);
1211 assert(!mem->cleaning);
1212 VM_PAGE_CHECK(mem);
1213
1214 if (remove_from_hash == TRUE) {
1215 /*
1216 * Remove from the object_object/offset hash table
1217 */
1218 hash_id = vm_page_hash(mem->object, mem->offset);
1219 bucket = &vm_page_buckets[hash_id];
1220 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1221
1222 lck_spin_lock(bucket_lock);
1223
1224 if ((this = bucket->pages) == mem) {
1225 /* optimize for common case */
1226
1227 bucket->pages = mem->next;
1228 } else {
1229 vm_page_t *prev;
1230
1231 for (prev = &this->next;
1232 (this = *prev) != mem;
1233 prev = &this->next)
1234 continue;
1235 *prev = this->next;
1236 }
1237 #if MACH_PAGE_HASH_STATS
1238 bucket->cur_count--;
1239 #endif /* MACH_PAGE_HASH_STATS */
1240
1241 lck_spin_unlock(bucket_lock);
1242 }
1243 /*
1244 * Now remove from the object's list of backed pages.
1245 */
1246
1247 VM_PAGE_REMOVE(mem);
1248
1249 /*
1250 * And show that the object has one fewer resident
1251 * page.
1252 */
1253
1254 assert(mem->object->resident_page_count > 0);
1255 mem->object->resident_page_count--;
1256 if (VM_PAGE_WIRED(mem)) {
1257 assert(mem->object->wired_page_count > 0);
1258 mem->object->wired_page_count--;
1259 }
1260 assert(mem->object->resident_page_count >=
1261 mem->object->wired_page_count);
1262 if (mem->reusable) {
1263 assert(mem->object->reusable_page_count > 0);
1264 mem->object->reusable_page_count--;
1265 assert(mem->object->reusable_page_count <=
1266 mem->object->resident_page_count);
1267 mem->reusable = FALSE;
1268 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1269 vm_page_stats_reusable.reused_remove++;
1270 } else if (mem->object->all_reusable) {
1271 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1272 vm_page_stats_reusable.reused_remove++;
1273 }
1274
1275 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1276 if (VM_PAGE_WIRED(mem)) {
1277 assert(vm_page_purgeable_wired_count > 0);
1278 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1279 } else {
1280 assert(vm_page_purgeable_count > 0);
1281 OSAddAtomic(-1, &vm_page_purgeable_count);
1282 }
1283 }
1284 mem->tabled = FALSE;
1285 mem->object = VM_OBJECT_NULL;
1286 mem->offset = (vm_object_offset_t) -1;
1287 }
1288
1289
1290 /*
1291 * vm_page_lookup:
1292 *
1293 * Returns the page associated with the object/offset
1294 * pair specified; if none is found, VM_PAGE_NULL is returned.
1295 *
1296 * The object must be locked. No side effects.
1297 */
1298
1299 unsigned long vm_page_lookup_hint = 0;
1300 unsigned long vm_page_lookup_hint_next = 0;
1301 unsigned long vm_page_lookup_hint_prev = 0;
1302 unsigned long vm_page_lookup_hint_miss = 0;
1303 unsigned long vm_page_lookup_bucket_NULL = 0;
1304 unsigned long vm_page_lookup_miss = 0;
1305
1306
1307 vm_page_t
1308 vm_page_lookup(
1309 vm_object_t object,
1310 vm_object_offset_t offset)
1311 {
1312 vm_page_t mem;
1313 vm_page_bucket_t *bucket;
1314 queue_entry_t qe;
1315 lck_spin_t *bucket_lock;
1316 int hash_id;
1317
1318 vm_object_lock_assert_held(object);
1319 mem = object->memq_hint;
1320
1321 if (mem != VM_PAGE_NULL) {
1322 assert(mem->object == object);
1323
1324 if (mem->offset == offset) {
1325 vm_page_lookup_hint++;
1326 return mem;
1327 }
1328 qe = queue_next(&mem->listq);
1329
1330 if (! queue_end(&object->memq, qe)) {
1331 vm_page_t next_page;
1332
1333 next_page = (vm_page_t) qe;
1334 assert(next_page->object == object);
1335
1336 if (next_page->offset == offset) {
1337 vm_page_lookup_hint_next++;
1338 object->memq_hint = next_page; /* new hint */
1339 return next_page;
1340 }
1341 }
1342 qe = queue_prev(&mem->listq);
1343
1344 if (! queue_end(&object->memq, qe)) {
1345 vm_page_t prev_page;
1346
1347 prev_page = (vm_page_t) qe;
1348 assert(prev_page->object == object);
1349
1350 if (prev_page->offset == offset) {
1351 vm_page_lookup_hint_prev++;
1352 object->memq_hint = prev_page; /* new hint */
1353 return prev_page;
1354 }
1355 }
1356 }
1357 /*
1358 * Search the hash table for this object/offset pair
1359 */
1360 hash_id = vm_page_hash(object, offset);
1361 bucket = &vm_page_buckets[hash_id];
1362
1363 /*
1364 * since we hold the object lock, we are guaranteed that no
1365 * new pages can be inserted into this object... this in turn
1366 * guarantess that the page we're looking for can't exist
1367 * if the bucket it hashes to is currently NULL even when looked
1368 * at outside the scope of the hash bucket lock... this is a
1369 * really cheap optimiztion to avoid taking the lock
1370 */
1371 if (bucket->pages == VM_PAGE_NULL) {
1372 vm_page_lookup_bucket_NULL++;
1373
1374 return (VM_PAGE_NULL);
1375 }
1376 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1377
1378 lck_spin_lock(bucket_lock);
1379
1380 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1381 VM_PAGE_CHECK(mem);
1382 if ((mem->object == object) && (mem->offset == offset))
1383 break;
1384 }
1385 lck_spin_unlock(bucket_lock);
1386
1387 if (mem != VM_PAGE_NULL) {
1388 if (object->memq_hint != VM_PAGE_NULL) {
1389 vm_page_lookup_hint_miss++;
1390 }
1391 assert(mem->object == object);
1392 object->memq_hint = mem;
1393 } else
1394 vm_page_lookup_miss++;
1395
1396 return(mem);
1397 }
1398
1399
1400 /*
1401 * vm_page_rename:
1402 *
1403 * Move the given memory entry from its
1404 * current object to the specified target object/offset.
1405 *
1406 * The object must be locked.
1407 */
1408 void
1409 vm_page_rename(
1410 register vm_page_t mem,
1411 register vm_object_t new_object,
1412 vm_object_offset_t new_offset,
1413 boolean_t encrypted_ok)
1414 {
1415 assert(mem->object != new_object);
1416
1417 /*
1418 * ENCRYPTED SWAP:
1419 * The encryption key is based on the page's memory object
1420 * (aka "pager") and paging offset. Moving the page to
1421 * another VM object changes its "pager" and "paging_offset"
1422 * so it has to be decrypted first, or we would lose the key.
1423 *
1424 * One exception is VM object collapsing, where we transfer pages
1425 * from one backing object to its parent object. This operation also
1426 * transfers the paging information, so the <pager,paging_offset> info
1427 * should remain consistent. The caller (vm_object_do_collapse())
1428 * sets "encrypted_ok" in this case.
1429 */
1430 if (!encrypted_ok && mem->encrypted) {
1431 panic("vm_page_rename: page %p is encrypted\n", mem);
1432 }
1433
1434 XPR(XPR_VM_PAGE,
1435 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1436 new_object, new_offset,
1437 mem, 0,0);
1438
1439 /*
1440 * Changes to mem->object require the page lock because
1441 * the pageout daemon uses that lock to get the object.
1442 */
1443 vm_page_lockspin_queues();
1444
1445 vm_page_remove(mem, TRUE);
1446 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1447
1448 vm_page_unlock_queues();
1449 }
1450
1451 /*
1452 * vm_page_init:
1453 *
1454 * Initialize the fields in a new page.
1455 * This takes a structure with random values and initializes it
1456 * so that it can be given to vm_page_release or vm_page_insert.
1457 */
1458 void
1459 vm_page_init(
1460 vm_page_t mem,
1461 ppnum_t phys_page,
1462 boolean_t lopage)
1463 {
1464 assert(phys_page);
1465
1466 *mem = vm_page_template;
1467 mem->phys_page = phys_page;
1468 mem->lopage = lopage;
1469 }
1470
1471 /*
1472 * vm_page_grab_fictitious:
1473 *
1474 * Remove a fictitious page from the free list.
1475 * Returns VM_PAGE_NULL if there are no free pages.
1476 */
1477 int c_vm_page_grab_fictitious = 0;
1478 int c_vm_page_release_fictitious = 0;
1479 int c_vm_page_more_fictitious = 0;
1480
1481 extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
1482
1483 vm_page_t
1484 vm_page_grab_fictitious_common(
1485 ppnum_t phys_addr)
1486 {
1487 register vm_page_t m;
1488
1489 m = (vm_page_t)zget(vm_page_zone);
1490 if (m) {
1491 vm_page_init(m, phys_addr, FALSE);
1492 m->fictitious = TRUE;
1493 }
1494
1495 c_vm_page_grab_fictitious++;
1496 return m;
1497 }
1498
1499 vm_page_t
1500 vm_page_grab_fictitious(void)
1501 {
1502 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1503 }
1504
1505 vm_page_t
1506 vm_page_grab_guard(void)
1507 {
1508 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1509 }
1510
1511 /*
1512 * vm_page_release_fictitious:
1513 *
1514 * Release a fictitious page to the free list.
1515 */
1516
1517 void
1518 vm_page_release_fictitious(
1519 register vm_page_t m)
1520 {
1521 assert(!m->free);
1522 assert(m->busy);
1523 assert(m->fictitious);
1524 assert(m->phys_page == vm_page_fictitious_addr ||
1525 m->phys_page == vm_page_guard_addr);
1526
1527 c_vm_page_release_fictitious++;
1528 #if DEBUG
1529 if (m->free)
1530 panic("vm_page_release_fictitious");
1531 #endif
1532 m->free = TRUE;
1533 zfree(vm_page_zone, m);
1534 }
1535
1536 /*
1537 * vm_page_more_fictitious:
1538 *
1539 * Add more fictitious pages to the free list.
1540 * Allowed to block. This routine is way intimate
1541 * with the zones code, for several reasons:
1542 * 1. we need to carve some page structures out of physical
1543 * memory before zones work, so they _cannot_ come from
1544 * the zone_map.
1545 * 2. the zone needs to be collectable in order to prevent
1546 * growth without bound. These structures are used by
1547 * the device pager (by the hundreds and thousands), as
1548 * private pages for pageout, and as blocking pages for
1549 * pagein. Temporary bursts in demand should not result in
1550 * permanent allocation of a resource.
1551 * 3. To smooth allocation humps, we allocate single pages
1552 * with kernel_memory_allocate(), and cram them into the
1553 * zone. This also allows us to initialize the vm_page_t's
1554 * on the way into the zone, so that zget() always returns
1555 * an initialized structure. The zone free element pointer
1556 * and the free page pointer are both the first item in the
1557 * vm_page_t.
1558 * 4. By having the pages in the zone pre-initialized, we need
1559 * not keep 2 levels of lists. The garbage collector simply
1560 * scans our list, and reduces physical memory usage as it
1561 * sees fit.
1562 */
1563
1564 void vm_page_more_fictitious(void)
1565 {
1566 register vm_page_t m;
1567 vm_offset_t addr;
1568 kern_return_t retval;
1569 int i;
1570
1571 c_vm_page_more_fictitious++;
1572
1573 /*
1574 * Allocate a single page from the zone_map. Do not wait if no physical
1575 * pages are immediately available, and do not zero the space. We need
1576 * our own blocking lock here to prevent having multiple,
1577 * simultaneous requests from piling up on the zone_map lock. Exactly
1578 * one (of our) threads should be potentially waiting on the map lock.
1579 * If winner is not vm-privileged, then the page allocation will fail,
1580 * and it will temporarily block here in the vm_page_wait().
1581 */
1582 lck_mtx_lock(&vm_page_alloc_lock);
1583 /*
1584 * If another thread allocated space, just bail out now.
1585 */
1586 if (zone_free_count(vm_page_zone) > 5) {
1587 /*
1588 * The number "5" is a small number that is larger than the
1589 * number of fictitious pages that any single caller will
1590 * attempt to allocate. Otherwise, a thread will attempt to
1591 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1592 * release all of the resources and locks already acquired,
1593 * and then call this routine. This routine finds the pages
1594 * that the caller released, so fails to allocate new space.
1595 * The process repeats infinitely. The largest known number
1596 * of fictitious pages required in this manner is 2. 5 is
1597 * simply a somewhat larger number.
1598 */
1599 lck_mtx_unlock(&vm_page_alloc_lock);
1600 return;
1601 }
1602
1603 retval = kernel_memory_allocate(zone_map,
1604 &addr, PAGE_SIZE, VM_PROT_ALL,
1605 KMA_KOBJECT|KMA_NOPAGEWAIT);
1606 if (retval != KERN_SUCCESS) {
1607 /*
1608 * No page was available. Tell the pageout daemon, drop the
1609 * lock to give another thread a chance at it, and
1610 * wait for the pageout daemon to make progress.
1611 */
1612 lck_mtx_unlock(&vm_page_alloc_lock);
1613 vm_page_wait(THREAD_UNINT);
1614 return;
1615 }
1616 /*
1617 * Initialize as many vm_page_t's as will fit on this page. This
1618 * depends on the zone code disturbing ONLY the first item of
1619 * each zone element.
1620 */
1621 m = (vm_page_t)addr;
1622 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1623 vm_page_init(m, vm_page_fictitious_addr, FALSE);
1624 m->fictitious = TRUE;
1625 m++;
1626 }
1627 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1628 lck_mtx_unlock(&vm_page_alloc_lock);
1629 }
1630
1631
1632 /*
1633 * vm_pool_low():
1634 *
1635 * Return true if it is not likely that a non-vm_privileged thread
1636 * can get memory without blocking. Advisory only, since the
1637 * situation may change under us.
1638 */
1639 int
1640 vm_pool_low(void)
1641 {
1642 /* No locking, at worst we will fib. */
1643 return( vm_page_free_count <= vm_page_free_reserved );
1644 }
1645
1646
1647
1648 /*
1649 * this is an interface to support bring-up of drivers
1650 * on platforms with physical memory > 4G...
1651 */
1652 int vm_himemory_mode = 0;
1653
1654
1655 /*
1656 * this interface exists to support hardware controllers
1657 * incapable of generating DMAs with more than 32 bits
1658 * of address on platforms with physical memory > 4G...
1659 */
1660 unsigned int vm_lopages_allocated_q = 0;
1661 unsigned int vm_lopages_allocated_cpm_success = 0;
1662 unsigned int vm_lopages_allocated_cpm_failed = 0;
1663 queue_head_t vm_lopage_queue_free;
1664
1665 vm_page_t
1666 vm_page_grablo(void)
1667 {
1668 vm_page_t mem;
1669
1670 if (vm_lopage_needed == FALSE)
1671 return (vm_page_grab());
1672
1673 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1674
1675 if ( !queue_empty(&vm_lopage_queue_free)) {
1676 queue_remove_first(&vm_lopage_queue_free,
1677 mem,
1678 vm_page_t,
1679 pageq);
1680 assert(vm_lopage_free_count);
1681
1682 vm_lopage_free_count--;
1683 vm_lopages_allocated_q++;
1684
1685 if (vm_lopage_free_count < vm_lopage_lowater)
1686 vm_lopage_refill = TRUE;
1687
1688 lck_mtx_unlock(&vm_page_queue_free_lock);
1689 } else {
1690 lck_mtx_unlock(&vm_page_queue_free_lock);
1691
1692 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1693
1694 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1695 vm_lopages_allocated_cpm_failed++;
1696 lck_mtx_unlock(&vm_page_queue_free_lock);
1697
1698 return (VM_PAGE_NULL);
1699 }
1700 mem->busy = TRUE;
1701
1702 vm_page_lockspin_queues();
1703
1704 mem->gobbled = FALSE;
1705 vm_page_gobble_count--;
1706 vm_page_wire_count--;
1707
1708 vm_lopages_allocated_cpm_success++;
1709 vm_page_unlock_queues();
1710 }
1711 assert(mem->gobbled);
1712 assert(mem->busy);
1713 assert(!mem->free);
1714 assert(!mem->pmapped);
1715 assert(!mem->wpmapped);
1716
1717 mem->pageq.next = NULL;
1718 mem->pageq.prev = NULL;
1719
1720 return (mem);
1721 }
1722
1723 /*
1724 * vm_page_grab:
1725 *
1726 * first try to grab a page from the per-cpu free list...
1727 * this must be done while pre-emption is disabled... if
1728 * a page is available, we're done...
1729 * if no page is available, grab the vm_page_queue_free_lock
1730 * and see if current number of free pages would allow us
1731 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1732 * if there are pages available, disable preemption and
1733 * recheck the state of the per-cpu free list... we could
1734 * have been preempted and moved to a different cpu, or
1735 * some other thread could have re-filled it... if still
1736 * empty, figure out how many pages we can steal from the
1737 * global free queue and move to the per-cpu queue...
1738 * return 1 of these pages when done... only wakeup the
1739 * pageout_scan thread if we moved pages from the global
1740 * list... no need for the wakeup if we've satisfied the
1741 * request from the per-cpu queue.
1742 */
1743
1744 #define COLOR_GROUPS_TO_STEAL 4
1745
1746
1747 vm_page_t
1748 vm_page_grab( void )
1749 {
1750 vm_page_t mem;
1751
1752
1753 disable_preemption();
1754
1755 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1756 return_page_from_cpu_list:
1757 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1758 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1759 mem->pageq.next = NULL;
1760
1761 enable_preemption();
1762
1763 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1764 assert(mem->tabled == FALSE);
1765 assert(mem->object == VM_OBJECT_NULL);
1766 assert(!mem->laundry);
1767 assert(!mem->free);
1768 assert(pmap_verify_free(mem->phys_page));
1769 assert(mem->busy);
1770 assert(!mem->encrypted);
1771 assert(!mem->pmapped);
1772 assert(!mem->wpmapped);
1773
1774 return mem;
1775 }
1776 enable_preemption();
1777
1778
1779 /*
1780 * Optionally produce warnings if the wire or gobble
1781 * counts exceed some threshold.
1782 */
1783 if (vm_page_wire_count_warning > 0
1784 && vm_page_wire_count >= vm_page_wire_count_warning) {
1785 printf("mk: vm_page_grab(): high wired page count of %d\n",
1786 vm_page_wire_count);
1787 assert(vm_page_wire_count < vm_page_wire_count_warning);
1788 }
1789 if (vm_page_gobble_count_warning > 0
1790 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1791 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1792 vm_page_gobble_count);
1793 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1794 }
1795
1796 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1797
1798 /*
1799 * Only let privileged threads (involved in pageout)
1800 * dip into the reserved pool.
1801 */
1802 if ((vm_page_free_count < vm_page_free_reserved) &&
1803 !(current_thread()->options & TH_OPT_VMPRIV)) {
1804 lck_mtx_unlock(&vm_page_queue_free_lock);
1805 mem = VM_PAGE_NULL;
1806 }
1807 else {
1808 vm_page_t head;
1809 vm_page_t tail;
1810 unsigned int pages_to_steal;
1811 unsigned int color;
1812
1813 while ( vm_page_free_count == 0 ) {
1814
1815 lck_mtx_unlock(&vm_page_queue_free_lock);
1816 /*
1817 * must be a privileged thread to be
1818 * in this state since a non-privileged
1819 * thread would have bailed if we were
1820 * under the vm_page_free_reserved mark
1821 */
1822 VM_PAGE_WAIT();
1823 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1824 }
1825
1826 disable_preemption();
1827
1828 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1829 lck_mtx_unlock(&vm_page_queue_free_lock);
1830
1831 /*
1832 * we got preempted and moved to another processor
1833 * or we got preempted and someone else ran and filled the cache
1834 */
1835 goto return_page_from_cpu_list;
1836 }
1837 if (vm_page_free_count <= vm_page_free_reserved)
1838 pages_to_steal = 1;
1839 else {
1840 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1841
1842 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1843 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1844 }
1845 color = PROCESSOR_DATA(current_processor(), start_color);
1846 head = tail = NULL;
1847
1848 while (pages_to_steal--) {
1849 if (--vm_page_free_count < vm_page_free_count_minimum)
1850 vm_page_free_count_minimum = vm_page_free_count;
1851
1852 while (queue_empty(&vm_page_queue_free[color]))
1853 color = (color + 1) & vm_color_mask;
1854
1855 queue_remove_first(&vm_page_queue_free[color],
1856 mem,
1857 vm_page_t,
1858 pageq);
1859 mem->pageq.next = NULL;
1860 mem->pageq.prev = NULL;
1861
1862 color = (color + 1) & vm_color_mask;
1863
1864 if (head == NULL)
1865 head = mem;
1866 else
1867 tail->pageq.next = (queue_t)mem;
1868 tail = mem;
1869
1870 mem->pageq.prev = NULL;
1871 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1872 assert(mem->tabled == FALSE);
1873 assert(mem->object == VM_OBJECT_NULL);
1874 assert(!mem->laundry);
1875 assert(mem->free);
1876 mem->free = FALSE;
1877
1878 assert(pmap_verify_free(mem->phys_page));
1879 assert(mem->busy);
1880 assert(!mem->free);
1881 assert(!mem->encrypted);
1882 assert(!mem->pmapped);
1883 assert(!mem->wpmapped);
1884 }
1885 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1886 PROCESSOR_DATA(current_processor(), start_color) = color;
1887
1888 /*
1889 * satisfy this request
1890 */
1891 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1892 mem = head;
1893 mem->pageq.next = NULL;
1894
1895 lck_mtx_unlock(&vm_page_queue_free_lock);
1896
1897 enable_preemption();
1898 }
1899 /*
1900 * Decide if we should poke the pageout daemon.
1901 * We do this if the free count is less than the low
1902 * water mark, or if the free count is less than the high
1903 * water mark (but above the low water mark) and the inactive
1904 * count is less than its target.
1905 *
1906 * We don't have the counts locked ... if they change a little,
1907 * it doesn't really matter.
1908 */
1909 if ((vm_page_free_count < vm_page_free_min) ||
1910 ((vm_page_free_count < vm_page_free_target) &&
1911 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1912 thread_wakeup((event_t) &vm_page_free_wanted);
1913
1914 #if CONFIG_EMBEDDED
1915 {
1916 int percent_avail;
1917
1918 /*
1919 * Decide if we need to poke the memorystatus notification thread.
1920 */
1921 percent_avail =
1922 (vm_page_active_count + vm_page_inactive_count +
1923 vm_page_speculative_count + vm_page_free_count +
1924 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1925 atop_64(max_mem);
1926 if (percent_avail <= (kern_memorystatus_level - 5)) {
1927 kern_memorystatus_level = percent_avail;
1928 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1929 }
1930 }
1931 #endif
1932
1933 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1934
1935 return mem;
1936 }
1937
1938 /*
1939 * vm_page_release:
1940 *
1941 * Return a page to the free list.
1942 */
1943
1944 void
1945 vm_page_release(
1946 register vm_page_t mem)
1947 {
1948 unsigned int color;
1949 int need_wakeup = 0;
1950 int need_priv_wakeup = 0;
1951 #if 0
1952 unsigned int pindex;
1953 phys_entry *physent;
1954
1955 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1956 if(physent->ppLink & ppN) { /* (BRINGUP) */
1957 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1958 }
1959 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1960 #endif
1961 assert(!mem->private && !mem->fictitious);
1962 if (vm_page_free_verify) {
1963 assert(pmap_verify_free(mem->phys_page));
1964 }
1965 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1966
1967
1968 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1969 #if DEBUG
1970 if (mem->free)
1971 panic("vm_page_release");
1972 #endif
1973 assert(mem->busy);
1974 assert(!mem->laundry);
1975 assert(mem->object == VM_OBJECT_NULL);
1976 assert(mem->pageq.next == NULL &&
1977 mem->pageq.prev == NULL);
1978 assert(mem->listq.next == NULL &&
1979 mem->listq.prev == NULL);
1980
1981 if ((mem->lopage || vm_lopage_refill == TRUE) &&
1982 vm_lopage_free_count < vm_lopage_free_limit &&
1983 mem->phys_page < max_valid_low_ppnum) {
1984 /*
1985 * this exists to support hardware controllers
1986 * incapable of generating DMAs with more than 32 bits
1987 * of address on platforms with physical memory > 4G...
1988 */
1989 queue_enter_first(&vm_lopage_queue_free,
1990 mem,
1991 vm_page_t,
1992 pageq);
1993 vm_lopage_free_count++;
1994
1995 if (vm_lopage_free_count >= vm_lopage_free_limit)
1996 vm_lopage_refill = FALSE;
1997
1998 mem->lopage = TRUE;
1999 } else {
2000 mem->lopage = FALSE;
2001 mem->free = TRUE;
2002
2003 color = mem->phys_page & vm_color_mask;
2004 queue_enter_first(&vm_page_queue_free[color],
2005 mem,
2006 vm_page_t,
2007 pageq);
2008 vm_page_free_count++;
2009 /*
2010 * Check if we should wake up someone waiting for page.
2011 * But don't bother waking them unless they can allocate.
2012 *
2013 * We wakeup only one thread, to prevent starvation.
2014 * Because the scheduling system handles wait queues FIFO,
2015 * if we wakeup all waiting threads, one greedy thread
2016 * can starve multiple niceguy threads. When the threads
2017 * all wakeup, the greedy threads runs first, grabs the page,
2018 * and waits for another page. It will be the first to run
2019 * when the next page is freed.
2020 *
2021 * However, there is a slight danger here.
2022 * The thread we wake might not use the free page.
2023 * Then the other threads could wait indefinitely
2024 * while the page goes unused. To forestall this,
2025 * the pageout daemon will keep making free pages
2026 * as long as vm_page_free_wanted is non-zero.
2027 */
2028
2029 assert(vm_page_free_count > 0);
2030 if (vm_page_free_wanted_privileged > 0) {
2031 vm_page_free_wanted_privileged--;
2032 need_priv_wakeup = 1;
2033 } else if (vm_page_free_wanted > 0 &&
2034 vm_page_free_count > vm_page_free_reserved) {
2035 vm_page_free_wanted--;
2036 need_wakeup = 1;
2037 }
2038 }
2039 lck_mtx_unlock(&vm_page_queue_free_lock);
2040
2041 if (need_priv_wakeup)
2042 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2043 else if (need_wakeup)
2044 thread_wakeup_one((event_t) &vm_page_free_count);
2045
2046 #if CONFIG_EMBEDDED
2047 {
2048 int percent_avail;
2049
2050 /*
2051 * Decide if we need to poke the memorystatus notification thread.
2052 * Locking is not a big issue, as only a single thread delivers these.
2053 */
2054 percent_avail =
2055 (vm_page_active_count + vm_page_inactive_count +
2056 vm_page_speculative_count + vm_page_free_count +
2057 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2058 atop_64(max_mem);
2059 if (percent_avail >= (kern_memorystatus_level + 5)) {
2060 kern_memorystatus_level = percent_avail;
2061 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2062 }
2063 }
2064 #endif
2065 }
2066
2067 /*
2068 * vm_page_wait:
2069 *
2070 * Wait for a page to become available.
2071 * If there are plenty of free pages, then we don't sleep.
2072 *
2073 * Returns:
2074 * TRUE: There may be another page, try again
2075 * FALSE: We were interrupted out of our wait, don't try again
2076 */
2077
2078 boolean_t
2079 vm_page_wait(
2080 int interruptible )
2081 {
2082 /*
2083 * We can't use vm_page_free_reserved to make this
2084 * determination. Consider: some thread might
2085 * need to allocate two pages. The first allocation
2086 * succeeds, the second fails. After the first page is freed,
2087 * a call to vm_page_wait must really block.
2088 */
2089 kern_return_t wait_result;
2090 int need_wakeup = 0;
2091 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2092
2093 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2094
2095 if (is_privileged && vm_page_free_count) {
2096 lck_mtx_unlock(&vm_page_queue_free_lock);
2097 return TRUE;
2098 }
2099 if (vm_page_free_count < vm_page_free_target) {
2100
2101 if (is_privileged) {
2102 if (vm_page_free_wanted_privileged++ == 0)
2103 need_wakeup = 1;
2104 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2105 } else {
2106 if (vm_page_free_wanted++ == 0)
2107 need_wakeup = 1;
2108 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2109 }
2110 lck_mtx_unlock(&vm_page_queue_free_lock);
2111 counter(c_vm_page_wait_block++);
2112
2113 if (need_wakeup)
2114 thread_wakeup((event_t)&vm_page_free_wanted);
2115
2116 if (wait_result == THREAD_WAITING)
2117 wait_result = thread_block(THREAD_CONTINUE_NULL);
2118
2119 return(wait_result == THREAD_AWAKENED);
2120 } else {
2121 lck_mtx_unlock(&vm_page_queue_free_lock);
2122 return TRUE;
2123 }
2124 }
2125
2126 /*
2127 * vm_page_alloc:
2128 *
2129 * Allocate and return a memory cell associated
2130 * with this VM object/offset pair.
2131 *
2132 * Object must be locked.
2133 */
2134
2135 vm_page_t
2136 vm_page_alloc(
2137 vm_object_t object,
2138 vm_object_offset_t offset)
2139 {
2140 register vm_page_t mem;
2141
2142 vm_object_lock_assert_exclusive(object);
2143 mem = vm_page_grab();
2144 if (mem == VM_PAGE_NULL)
2145 return VM_PAGE_NULL;
2146
2147 vm_page_insert(mem, object, offset);
2148
2149 return(mem);
2150 }
2151
2152 vm_page_t
2153 vm_page_alloclo(
2154 vm_object_t object,
2155 vm_object_offset_t offset)
2156 {
2157 register vm_page_t mem;
2158
2159 vm_object_lock_assert_exclusive(object);
2160 mem = vm_page_grablo();
2161 if (mem == VM_PAGE_NULL)
2162 return VM_PAGE_NULL;
2163
2164 vm_page_insert(mem, object, offset);
2165
2166 return(mem);
2167 }
2168
2169
2170 /*
2171 * vm_page_alloc_guard:
2172 *
2173 * Allocate a fictitious page which will be used
2174 * as a guard page. The page will be inserted into
2175 * the object and returned to the caller.
2176 */
2177
2178 vm_page_t
2179 vm_page_alloc_guard(
2180 vm_object_t object,
2181 vm_object_offset_t offset)
2182 {
2183 register vm_page_t mem;
2184
2185 vm_object_lock_assert_exclusive(object);
2186 mem = vm_page_grab_guard();
2187 if (mem == VM_PAGE_NULL)
2188 return VM_PAGE_NULL;
2189
2190 vm_page_insert(mem, object, offset);
2191
2192 return(mem);
2193 }
2194
2195
2196 counter(unsigned int c_laundry_pages_freed = 0;)
2197
2198 /*
2199 * vm_page_free:
2200 *
2201 * Returns the given page to the free list,
2202 * disassociating it with any VM object.
2203 *
2204 * Object and page queues must be locked prior to entry.
2205 */
2206 static void
2207 vm_page_free_prepare(
2208 register vm_page_t mem)
2209 {
2210 vm_page_free_prepare_queues(mem);
2211 vm_page_free_prepare_object(mem, TRUE);
2212 }
2213
2214
2215 void
2216 vm_page_free_prepare_queues(
2217 vm_page_t mem)
2218 {
2219 VM_PAGE_CHECK(mem);
2220 assert(!mem->free);
2221 assert(!mem->cleaning);
2222 assert(!mem->pageout);
2223 #if DEBUG
2224 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2225 if (mem->free)
2226 panic("vm_page_free: freeing page on free list\n");
2227 #endif
2228 if (mem->object) {
2229 vm_object_lock_assert_exclusive(mem->object);
2230 }
2231
2232 if (mem->laundry) {
2233 /*
2234 * We may have to free a page while it's being laundered
2235 * if we lost its pager (due to a forced unmount, for example).
2236 * We need to call vm_pageout_throttle_up() before removing
2237 * the page from its VM object, so that we can find out on
2238 * which pageout queue the page is on.
2239 */
2240 vm_pageout_throttle_up(mem);
2241 counter(++c_laundry_pages_freed);
2242 }
2243 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2244
2245 if (VM_PAGE_WIRED(mem)) {
2246 if (mem->object) {
2247 assert(mem->object->wired_page_count > 0);
2248 mem->object->wired_page_count--;
2249 assert(mem->object->resident_page_count >=
2250 mem->object->wired_page_count);
2251 }
2252 if (!mem->private && !mem->fictitious)
2253 vm_page_wire_count--;
2254 mem->wire_count = 0;
2255 assert(!mem->gobbled);
2256 } else if (mem->gobbled) {
2257 if (!mem->private && !mem->fictitious)
2258 vm_page_wire_count--;
2259 vm_page_gobble_count--;
2260 }
2261 }
2262
2263
2264 void
2265 vm_page_free_prepare_object(
2266 vm_page_t mem,
2267 boolean_t remove_from_hash)
2268 {
2269 if (mem->object) {
2270 vm_object_lock_assert_exclusive(mem->object);
2271 }
2272
2273 if (mem->tabled)
2274 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2275
2276 PAGE_WAKEUP(mem); /* clears wanted */
2277
2278 if (mem->private) {
2279 mem->private = FALSE;
2280 mem->fictitious = TRUE;
2281 mem->phys_page = vm_page_fictitious_addr;
2282 }
2283 if (mem->fictitious) {
2284 /* Some of these may be unnecessary */
2285 mem->gobbled = FALSE;
2286 mem->busy = TRUE;
2287 mem->absent = FALSE;
2288 mem->error = FALSE;
2289 mem->dirty = FALSE;
2290 mem->precious = FALSE;
2291 mem->reference = FALSE;
2292 mem->encrypted = FALSE;
2293 mem->encrypted_cleaning = FALSE;
2294 mem->pmapped = FALSE;
2295 mem->wpmapped = FALSE;
2296 mem->reusable = FALSE;
2297 } else {
2298 if (mem->zero_fill == TRUE)
2299 VM_ZF_COUNT_DECR();
2300 vm_page_init(mem, mem->phys_page, mem->lopage);
2301 }
2302 }
2303
2304
2305 void
2306 vm_page_free(
2307 vm_page_t mem)
2308 {
2309 vm_page_free_prepare(mem);
2310 if (mem->fictitious) {
2311 vm_page_release_fictitious(mem);
2312 } else {
2313 vm_page_release(mem);
2314 }
2315 }
2316
2317
2318 void
2319 vm_page_free_unlocked(
2320 vm_page_t mem,
2321 boolean_t remove_from_hash)
2322 {
2323 vm_page_lockspin_queues();
2324 vm_page_free_prepare_queues(mem);
2325 vm_page_unlock_queues();
2326
2327 vm_page_free_prepare_object(mem, remove_from_hash);
2328
2329 if (mem->fictitious) {
2330 vm_page_release_fictitious(mem);
2331 } else {
2332 vm_page_release(mem);
2333 }
2334 }
2335
2336 /*
2337 * Free a list of pages. The list can be up to several hundred pages,
2338 * as blocked up by vm_pageout_scan().
2339 * The big win is not having to take the free list lock once
2340 * per page. We sort the incoming pages into n lists, one for
2341 * each color.
2342 */
2343 void
2344 vm_page_free_list(
2345 vm_page_t mem,
2346 boolean_t prepare_object)
2347 {
2348 vm_page_t nxt;
2349 int pg_count = 0;
2350 int color;
2351 int inuse_list_head = -1;
2352
2353 queue_head_t free_list[MAX_COLORS];
2354 int inuse[MAX_COLORS];
2355
2356 for (color = 0; color < (signed) vm_colors; color++) {
2357 queue_init(&free_list[color]);
2358 }
2359
2360 while (mem) {
2361 assert(!mem->inactive);
2362 assert(!mem->active);
2363 assert(!mem->throttled);
2364 assert(!mem->free);
2365 assert(!mem->speculative);
2366 assert(!VM_PAGE_WIRED(mem));
2367 assert(mem->pageq.prev == NULL);
2368
2369 nxt = (vm_page_t)(mem->pageq.next);
2370
2371 if (prepare_object == TRUE)
2372 vm_page_free_prepare_object(mem, TRUE);
2373
2374 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2375 assert(pmap_verify_free(mem->phys_page));
2376 }
2377 assert(mem->busy);
2378
2379 if (!mem->fictitious) {
2380 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2381 vm_lopage_free_count < vm_lopage_free_limit &&
2382 mem->phys_page < max_valid_low_ppnum) {
2383 mem->pageq.next = NULL;
2384 vm_page_release(mem);
2385 } else {
2386
2387 /*
2388 * IMPORTANT: we can't set the page "free" here
2389 * because that would make the page eligible for
2390 * a physically-contiguous allocation (see
2391 * vm_page_find_contiguous()) right away (we don't
2392 * hold the vm_page_queue_free lock). That would
2393 * cause trouble because the page is not actually
2394 * in the free queue yet...
2395 */
2396 color = mem->phys_page & vm_color_mask;
2397 if (queue_empty(&free_list[color])) {
2398 inuse[color] = inuse_list_head;
2399 inuse_list_head = color;
2400 }
2401 queue_enter_first(&free_list[color],
2402 mem,
2403 vm_page_t,
2404 pageq);
2405 pg_count++;
2406 }
2407 } else {
2408 assert(mem->phys_page == vm_page_fictitious_addr ||
2409 mem->phys_page == vm_page_guard_addr);
2410 vm_page_release_fictitious(mem);
2411 }
2412 mem = nxt;
2413 }
2414 if (pg_count) {
2415 unsigned int avail_free_count;
2416 unsigned int need_wakeup = 0;
2417 unsigned int need_priv_wakeup = 0;
2418
2419 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2420
2421 color = inuse_list_head;
2422
2423 while( color != -1 ) {
2424 vm_page_t first, last;
2425 vm_page_t first_free;
2426
2427 /*
2428 * Now that we hold the vm_page_queue_free lock,
2429 * it's safe to mark all pages in our local queue
2430 * as "free"...
2431 */
2432 queue_iterate(&free_list[color],
2433 mem,
2434 vm_page_t,
2435 pageq) {
2436 assert(!mem->free);
2437 assert(mem->busy);
2438 mem->free = TRUE;
2439 }
2440
2441 /*
2442 * ... and insert our local queue at the head of
2443 * the global free queue.
2444 */
2445 first = (vm_page_t) queue_first(&free_list[color]);
2446 last = (vm_page_t) queue_last(&free_list[color]);
2447 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2448 if (queue_empty(&vm_page_queue_free[color])) {
2449 queue_last(&vm_page_queue_free[color]) =
2450 (queue_entry_t) last;
2451 } else {
2452 queue_prev(&first_free->pageq) =
2453 (queue_entry_t) last;
2454 }
2455 queue_first(&vm_page_queue_free[color]) =
2456 (queue_entry_t) first;
2457 queue_prev(&first->pageq) =
2458 (queue_entry_t) &vm_page_queue_free[color];
2459 queue_next(&last->pageq) =
2460 (queue_entry_t) first_free;
2461
2462 /* next color */
2463 color = inuse[color];
2464 }
2465
2466 vm_page_free_count += pg_count;
2467 avail_free_count = vm_page_free_count;
2468
2469 if (vm_page_free_wanted_privileged > 0 &&
2470 avail_free_count > 0) {
2471 if (avail_free_count < vm_page_free_wanted_privileged) {
2472 need_priv_wakeup = avail_free_count;
2473 vm_page_free_wanted_privileged -=
2474 avail_free_count;
2475 avail_free_count = 0;
2476 } else {
2477 need_priv_wakeup = vm_page_free_wanted_privileged;
2478 vm_page_free_wanted_privileged = 0;
2479 avail_free_count -=
2480 vm_page_free_wanted_privileged;
2481 }
2482 }
2483
2484 if (vm_page_free_wanted > 0 &&
2485 avail_free_count > vm_page_free_reserved) {
2486 unsigned int available_pages;
2487
2488 available_pages = (avail_free_count -
2489 vm_page_free_reserved);
2490
2491 if (available_pages >= vm_page_free_wanted) {
2492 need_wakeup = vm_page_free_wanted;
2493 vm_page_free_wanted = 0;
2494 } else {
2495 need_wakeup = available_pages;
2496 vm_page_free_wanted -= available_pages;
2497 }
2498 }
2499 lck_mtx_unlock(&vm_page_queue_free_lock);
2500
2501 if (need_priv_wakeup != 0) {
2502 /*
2503 * There shouldn't be that many VM-privileged threads,
2504 * so let's wake them all up, even if we don't quite
2505 * have enough pages to satisfy them all.
2506 */
2507 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2508 }
2509 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2510 /*
2511 * We don't expect to have any more waiters
2512 * after this, so let's wake them all up at
2513 * once.
2514 */
2515 thread_wakeup((event_t) &vm_page_free_count);
2516 } else for (; need_wakeup != 0; need_wakeup--) {
2517 /*
2518 * Wake up one waiter per page we just released.
2519 */
2520 thread_wakeup_one((event_t) &vm_page_free_count);
2521 }
2522 #if CONFIG_EMBEDDED
2523 {
2524 int percent_avail;
2525
2526 /*
2527 * Decide if we need to poke the memorystatus notification thread.
2528 */
2529 percent_avail =
2530 (vm_page_active_count + vm_page_inactive_count +
2531 vm_page_speculative_count + vm_page_free_count +
2532 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2533 atop_64(max_mem);
2534 if (percent_avail >= (kern_memorystatus_level + 5)) {
2535 kern_memorystatus_level = percent_avail;
2536 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2537 }
2538 }
2539 #endif
2540 }
2541 }
2542
2543
2544 /*
2545 * vm_page_wire:
2546 *
2547 * Mark this page as wired down by yet
2548 * another map, removing it from paging queues
2549 * as necessary.
2550 *
2551 * The page's object and the page queues must be locked.
2552 */
2553 void
2554 vm_page_wire(
2555 register vm_page_t mem)
2556 {
2557
2558 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2559
2560 VM_PAGE_CHECK(mem);
2561 if (mem->object) {
2562 vm_object_lock_assert_exclusive(mem->object);
2563 } else {
2564 /*
2565 * In theory, the page should be in an object before it
2566 * gets wired, since we need to hold the object lock
2567 * to update some fields in the page structure.
2568 * However, some code (i386 pmap, for example) might want
2569 * to wire a page before it gets inserted into an object.
2570 * That's somewhat OK, as long as nobody else can get to
2571 * that page and update it at the same time.
2572 */
2573 }
2574 #if DEBUG
2575 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2576 #endif
2577 if ( !VM_PAGE_WIRED(mem)) {
2578 VM_PAGE_QUEUES_REMOVE(mem);
2579
2580 if (mem->object) {
2581 mem->object->wired_page_count++;
2582 assert(mem->object->resident_page_count >=
2583 mem->object->wired_page_count);
2584 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2585 assert(vm_page_purgeable_count > 0);
2586 OSAddAtomic(-1, &vm_page_purgeable_count);
2587 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2588 }
2589 if (mem->object->all_reusable) {
2590 /*
2591 * Wired pages are not counted as "re-usable"
2592 * in "all_reusable" VM objects, so nothing
2593 * to do here.
2594 */
2595 } else if (mem->reusable) {
2596 /*
2597 * This page is not "re-usable" when it's
2598 * wired, so adjust its state and the
2599 * accounting.
2600 */
2601 vm_object_reuse_pages(mem->object,
2602 mem->offset,
2603 mem->offset+PAGE_SIZE_64,
2604 FALSE);
2605 }
2606 }
2607 assert(!mem->reusable);
2608
2609 if (!mem->private && !mem->fictitious && !mem->gobbled)
2610 vm_page_wire_count++;
2611 if (mem->gobbled)
2612 vm_page_gobble_count--;
2613 mem->gobbled = FALSE;
2614 if (mem->zero_fill == TRUE) {
2615 mem->zero_fill = FALSE;
2616 VM_ZF_COUNT_DECR();
2617 }
2618 #if CONFIG_EMBEDDED
2619 {
2620 int percent_avail;
2621
2622 /*
2623 * Decide if we need to poke the memorystatus notification thread.
2624 */
2625 percent_avail =
2626 (vm_page_active_count + vm_page_inactive_count +
2627 vm_page_speculative_count + vm_page_free_count +
2628 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2629 atop_64(max_mem);
2630 if (percent_avail <= (kern_memorystatus_level - 5)) {
2631 kern_memorystatus_level = percent_avail;
2632 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2633 }
2634 }
2635 #endif
2636 /*
2637 * ENCRYPTED SWAP:
2638 * The page could be encrypted, but
2639 * We don't have to decrypt it here
2640 * because we don't guarantee that the
2641 * data is actually valid at this point.
2642 * The page will get decrypted in
2643 * vm_fault_wire() if needed.
2644 */
2645 }
2646 assert(!mem->gobbled);
2647 mem->wire_count++;
2648 VM_PAGE_CHECK(mem);
2649 }
2650
2651 /*
2652 * vm_page_gobble:
2653 *
2654 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2655 *
2656 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2657 */
2658 void
2659 vm_page_gobble(
2660 register vm_page_t mem)
2661 {
2662 vm_page_lockspin_queues();
2663 VM_PAGE_CHECK(mem);
2664
2665 assert(!mem->gobbled);
2666 assert( !VM_PAGE_WIRED(mem));
2667
2668 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2669 if (!mem->private && !mem->fictitious)
2670 vm_page_wire_count++;
2671 }
2672 vm_page_gobble_count++;
2673 mem->gobbled = TRUE;
2674 vm_page_unlock_queues();
2675 }
2676
2677 /*
2678 * vm_page_unwire:
2679 *
2680 * Release one wiring of this page, potentially
2681 * enabling it to be paged again.
2682 *
2683 * The page's object and the page queues must be locked.
2684 */
2685 void
2686 vm_page_unwire(
2687 vm_page_t mem,
2688 boolean_t queueit)
2689 {
2690
2691 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2692
2693 VM_PAGE_CHECK(mem);
2694 assert(VM_PAGE_WIRED(mem));
2695 assert(mem->object != VM_OBJECT_NULL);
2696 #if DEBUG
2697 vm_object_lock_assert_exclusive(mem->object);
2698 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2699 #endif
2700 if (--mem->wire_count == 0) {
2701 assert(!mem->private && !mem->fictitious);
2702 vm_page_wire_count--;
2703 assert(mem->object->wired_page_count > 0);
2704 mem->object->wired_page_count--;
2705 assert(mem->object->resident_page_count >=
2706 mem->object->wired_page_count);
2707 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2708 OSAddAtomic(+1, &vm_page_purgeable_count);
2709 assert(vm_page_purgeable_wired_count > 0);
2710 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2711 }
2712 assert(!mem->laundry);
2713 assert(mem->object != kernel_object);
2714 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2715
2716 if (queueit == TRUE) {
2717 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2718 vm_page_deactivate(mem);
2719 } else {
2720 vm_page_activate(mem);
2721 }
2722 }
2723 #if CONFIG_EMBEDDED
2724 {
2725 int percent_avail;
2726
2727 /*
2728 * Decide if we need to poke the memorystatus notification thread.
2729 */
2730 percent_avail =
2731 (vm_page_active_count + vm_page_inactive_count +
2732 vm_page_speculative_count + vm_page_free_count +
2733 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2734 atop_64(max_mem);
2735 if (percent_avail >= (kern_memorystatus_level + 5)) {
2736 kern_memorystatus_level = percent_avail;
2737 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2738 }
2739 }
2740 #endif
2741 }
2742 VM_PAGE_CHECK(mem);
2743 }
2744
2745 /*
2746 * vm_page_deactivate:
2747 *
2748 * Returns the given page to the inactive list,
2749 * indicating that no physical maps have access
2750 * to this page. [Used by the physical mapping system.]
2751 *
2752 * The page queues must be locked.
2753 */
2754 void
2755 vm_page_deactivate(
2756 vm_page_t m)
2757 {
2758 vm_page_deactivate_internal(m, TRUE);
2759 }
2760
2761
2762 void
2763 vm_page_deactivate_internal(
2764 vm_page_t m,
2765 boolean_t clear_hw_reference)
2766 {
2767
2768 VM_PAGE_CHECK(m);
2769 assert(m->object != kernel_object);
2770 assert(m->phys_page != vm_page_guard_addr);
2771
2772 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2773 #if DEBUG
2774 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2775 #endif
2776 /*
2777 * This page is no longer very interesting. If it was
2778 * interesting (active or inactive/referenced), then we
2779 * clear the reference bit and (re)enter it in the
2780 * inactive queue. Note wired pages should not have
2781 * their reference bit cleared.
2782 */
2783
2784 if (m->absent && !m->unusual)
2785 panic("vm_page_deactivate: %p absent", m);
2786
2787 if (m->gobbled) { /* can this happen? */
2788 assert( !VM_PAGE_WIRED(m));
2789
2790 if (!m->private && !m->fictitious)
2791 vm_page_wire_count--;
2792 vm_page_gobble_count--;
2793 m->gobbled = FALSE;
2794 }
2795 if (m->private || (VM_PAGE_WIRED(m)))
2796 return;
2797
2798 if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
2799 pmap_clear_reference(m->phys_page);
2800
2801 m->reference = FALSE;
2802 m->no_cache = FALSE;
2803
2804 if (!m->inactive) {
2805 VM_PAGE_QUEUES_REMOVE(m);
2806
2807 assert(!m->laundry);
2808 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2809
2810 if (!IP_VALID(memory_manager_default) &&
2811 m->dirty && m->object->internal &&
2812 (m->object->purgable == VM_PURGABLE_DENY ||
2813 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2814 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2815 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2816 m->throttled = TRUE;
2817 vm_page_throttled_count++;
2818 } else {
2819 if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
2820 vm_page_speculate(m, FALSE);
2821 #if DEVELOPMENT || DEBUG
2822 vm_page_speculative_recreated++;
2823 #endif
2824 return;
2825 } else {
2826 if (m->zero_fill) {
2827 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2828 vm_zf_queue_count++;
2829 } else {
2830 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2831 }
2832 }
2833 m->inactive = TRUE;
2834 if (!m->fictitious) {
2835 vm_page_inactive_count++;
2836 token_new_pagecount++;
2837 }
2838 }
2839 }
2840 }
2841
2842 /*
2843 * vm_page_activate:
2844 *
2845 * Put the specified page on the active list (if appropriate).
2846 *
2847 * The page queues must be locked.
2848 */
2849
2850 void
2851 vm_page_activate(
2852 register vm_page_t m)
2853 {
2854 VM_PAGE_CHECK(m);
2855 #ifdef FIXME_4778297
2856 assert(m->object != kernel_object);
2857 #endif
2858 assert(m->phys_page != vm_page_guard_addr);
2859 #if DEBUG
2860 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2861 #endif
2862
2863 if (m->absent && !m->unusual)
2864 panic("vm_page_activate: %p absent", m);
2865
2866 if (m->gobbled) {
2867 assert( !VM_PAGE_WIRED(m));
2868 if (!m->private && !m->fictitious)
2869 vm_page_wire_count--;
2870 vm_page_gobble_count--;
2871 m->gobbled = FALSE;
2872 }
2873 if (m->private)
2874 return;
2875
2876 #if DEBUG
2877 if (m->active)
2878 panic("vm_page_activate: already active");
2879 #endif
2880
2881 if (m->speculative) {
2882 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2883 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2884 }
2885
2886 VM_PAGE_QUEUES_REMOVE(m);
2887
2888 if ( !VM_PAGE_WIRED(m)) {
2889 assert(!m->laundry);
2890 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2891 if (!IP_VALID(memory_manager_default) &&
2892 !m->fictitious && m->dirty && m->object->internal &&
2893 (m->object->purgable == VM_PURGABLE_DENY ||
2894 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2895 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2896 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2897 m->throttled = TRUE;
2898 vm_page_throttled_count++;
2899 } else {
2900 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2901 m->active = TRUE;
2902 if (!m->fictitious)
2903 vm_page_active_count++;
2904 }
2905 m->reference = TRUE;
2906 m->no_cache = FALSE;
2907 }
2908 VM_PAGE_CHECK(m);
2909 }
2910
2911
2912 /*
2913 * vm_page_speculate:
2914 *
2915 * Put the specified page on the speculative list (if appropriate).
2916 *
2917 * The page queues must be locked.
2918 */
2919 void
2920 vm_page_speculate(
2921 vm_page_t m,
2922 boolean_t new)
2923 {
2924 struct vm_speculative_age_q *aq;
2925
2926 VM_PAGE_CHECK(m);
2927 assert(m->object != kernel_object);
2928 assert(m->phys_page != vm_page_guard_addr);
2929 #if DEBUG
2930 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2931 #endif
2932
2933 if (m->absent && !m->unusual)
2934 panic("vm_page_speculate: %p absent", m);
2935
2936 VM_PAGE_QUEUES_REMOVE(m);
2937
2938 if ( !VM_PAGE_WIRED(m)) {
2939 mach_timespec_t ts;
2940 clock_sec_t sec;
2941 clock_nsec_t nsec;
2942
2943 clock_get_system_nanotime(&sec, &nsec);
2944 ts.tv_sec = (unsigned int) sec;
2945 ts.tv_nsec = nsec;
2946
2947 if (vm_page_speculative_count == 0) {
2948
2949 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2950 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2951
2952 aq = &vm_page_queue_speculative[speculative_age_index];
2953
2954 /*
2955 * set the timer to begin a new group
2956 */
2957 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2958 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2959
2960 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2961 } else {
2962 aq = &vm_page_queue_speculative[speculative_age_index];
2963
2964 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2965
2966 speculative_age_index++;
2967
2968 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2969 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2970 if (speculative_age_index == speculative_steal_index) {
2971 speculative_steal_index = speculative_age_index + 1;
2972
2973 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2974 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2975 }
2976 aq = &vm_page_queue_speculative[speculative_age_index];
2977
2978 if (!queue_empty(&aq->age_q))
2979 vm_page_speculate_ageit(aq);
2980
2981 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2982 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2983
2984 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2985 }
2986 }
2987 enqueue_tail(&aq->age_q, &m->pageq);
2988 m->speculative = TRUE;
2989 vm_page_speculative_count++;
2990
2991 if (new == TRUE) {
2992 m->object->pages_created++;
2993 #if DEVELOPMENT || DEBUG
2994 vm_page_speculative_created++;
2995 #endif
2996 }
2997 }
2998 VM_PAGE_CHECK(m);
2999 }
3000
3001
3002 /*
3003 * move pages from the specified aging bin to
3004 * the speculative bin that pageout_scan claims from
3005 *
3006 * The page queues must be locked.
3007 */
3008 void
3009 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3010 {
3011 struct vm_speculative_age_q *sq;
3012 vm_page_t t;
3013
3014 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3015
3016 if (queue_empty(&sq->age_q)) {
3017 sq->age_q.next = aq->age_q.next;
3018 sq->age_q.prev = aq->age_q.prev;
3019
3020 t = (vm_page_t)sq->age_q.next;
3021 t->pageq.prev = &sq->age_q;
3022
3023 t = (vm_page_t)sq->age_q.prev;
3024 t->pageq.next = &sq->age_q;
3025 } else {
3026 t = (vm_page_t)sq->age_q.prev;
3027 t->pageq.next = aq->age_q.next;
3028
3029 t = (vm_page_t)aq->age_q.next;
3030 t->pageq.prev = sq->age_q.prev;
3031
3032 t = (vm_page_t)aq->age_q.prev;
3033 t->pageq.next = &sq->age_q;
3034
3035 sq->age_q.prev = aq->age_q.prev;
3036 }
3037 queue_init(&aq->age_q);
3038 }
3039
3040
3041 void
3042 vm_page_lru(
3043 vm_page_t m)
3044 {
3045 VM_PAGE_CHECK(m);
3046 assert(m->object != kernel_object);
3047 assert(m->phys_page != vm_page_guard_addr);
3048
3049 #if DEBUG
3050 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052 if (m->active || m->reference)
3053 return;
3054
3055 if (m->private || (VM_PAGE_WIRED(m)))
3056 return;
3057
3058 m->no_cache = FALSE;
3059
3060 VM_PAGE_QUEUES_REMOVE(m);
3061
3062 assert(!m->laundry);
3063 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
3064
3065 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
3066 m->inactive = TRUE;
3067
3068 vm_page_inactive_count++;
3069 token_new_pagecount++;
3070 }
3071
3072
3073 void
3074 vm_page_reactivate_all_throttled(void)
3075 {
3076 vm_page_t first_throttled, last_throttled;
3077 vm_page_t first_active;
3078 vm_page_t m;
3079 int extra_active_count;
3080
3081 extra_active_count = 0;
3082 vm_page_lock_queues();
3083 if (! queue_empty(&vm_page_queue_throttled)) {
3084 /*
3085 * Switch "throttled" pages to "active".
3086 */
3087 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3088 VM_PAGE_CHECK(m);
3089 assert(m->throttled);
3090 assert(!m->active);
3091 assert(!m->inactive);
3092 assert(!m->speculative);
3093 assert(!VM_PAGE_WIRED(m));
3094 if (!m->fictitious) {
3095 extra_active_count++;
3096 }
3097 m->throttled = FALSE;
3098 m->active = TRUE;
3099 VM_PAGE_CHECK(m);
3100 }
3101
3102 /*
3103 * Transfer the entire throttled queue to a regular LRU page queues.
3104 * We insert it at the head of the active queue, so that these pages
3105 * get re-evaluated by the LRU algorithm first, since they've been
3106 * completely out of it until now.
3107 */
3108 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3109 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3110 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3111 if (queue_empty(&vm_page_queue_active)) {
3112 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3113 } else {
3114 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3115 }
3116 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3117 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3118 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3119
3120 #if DEBUG
3121 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3122 #endif
3123 queue_init(&vm_page_queue_throttled);
3124 /*
3125 * Adjust the global page counts.
3126 */
3127 vm_page_active_count += extra_active_count;
3128 vm_page_throttled_count = 0;
3129 }
3130 assert(vm_page_throttled_count == 0);
3131 assert(queue_empty(&vm_page_queue_throttled));
3132 vm_page_unlock_queues();
3133 }
3134
3135
3136 /*
3137 * move pages from the indicated local queue to the global active queue
3138 * its ok to fail if we're below the hard limit and force == FALSE
3139 * the nolocks == TRUE case is to allow this function to be run on
3140 * the hibernate path
3141 */
3142
3143 void
3144 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3145 {
3146 struct vpl *lq;
3147 vm_page_t first_local, last_local;
3148 vm_page_t first_active;
3149 vm_page_t m;
3150 uint32_t count = 0;
3151
3152 if (vm_page_local_q == NULL)
3153 return;
3154
3155 lq = &vm_page_local_q[lid].vpl_un.vpl;
3156
3157 if (nolocks == FALSE) {
3158 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3159 if ( !vm_page_trylockspin_queues())
3160 return;
3161 } else
3162 vm_page_lockspin_queues();
3163
3164 VPL_LOCK(&lq->vpl_lock);
3165 }
3166 if (lq->vpl_count) {
3167 /*
3168 * Switch "local" pages to "active".
3169 */
3170 assert(!queue_empty(&lq->vpl_queue));
3171
3172 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3173 VM_PAGE_CHECK(m);
3174 assert(m->local);
3175 assert(!m->active);
3176 assert(!m->inactive);
3177 assert(!m->speculative);
3178 assert(!VM_PAGE_WIRED(m));
3179 assert(!m->throttled);
3180 assert(!m->fictitious);
3181
3182 if (m->local_id != lid)
3183 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3184
3185 m->local_id = 0;
3186 m->local = FALSE;
3187 m->active = TRUE;
3188 VM_PAGE_CHECK(m);
3189
3190 count++;
3191 }
3192 if (count != lq->vpl_count)
3193 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3194
3195 /*
3196 * Transfer the entire local queue to a regular LRU page queues.
3197 */
3198 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3199 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3200 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3201
3202 if (queue_empty(&vm_page_queue_active)) {
3203 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3204 } else {
3205 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3206 }
3207 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3208 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3209 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3210
3211 queue_init(&lq->vpl_queue);
3212 /*
3213 * Adjust the global page counts.
3214 */
3215 vm_page_active_count += lq->vpl_count;
3216 lq->vpl_count = 0;
3217 }
3218 assert(queue_empty(&lq->vpl_queue));
3219
3220 if (nolocks == FALSE) {
3221 VPL_UNLOCK(&lq->vpl_lock);
3222 vm_page_unlock_queues();
3223 }
3224 }
3225
3226 /*
3227 * vm_page_part_zero_fill:
3228 *
3229 * Zero-fill a part of the page.
3230 */
3231 void
3232 vm_page_part_zero_fill(
3233 vm_page_t m,
3234 vm_offset_t m_pa,
3235 vm_size_t len)
3236 {
3237 vm_page_t tmp;
3238
3239 VM_PAGE_CHECK(m);
3240 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3241 pmap_zero_part_page(m->phys_page, m_pa, len);
3242 #else
3243 while (1) {
3244 tmp = vm_page_grab();
3245 if (tmp == VM_PAGE_NULL) {
3246 vm_page_wait(THREAD_UNINT);
3247 continue;
3248 }
3249 break;
3250 }
3251 vm_page_zero_fill(tmp);
3252 if(m_pa != 0) {
3253 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3254 }
3255 if((m_pa + len) < PAGE_SIZE) {
3256 vm_page_part_copy(m, m_pa + len, tmp,
3257 m_pa + len, PAGE_SIZE - (m_pa + len));
3258 }
3259 vm_page_copy(tmp,m);
3260 VM_PAGE_FREE(tmp);
3261 #endif
3262
3263 }
3264
3265 /*
3266 * vm_page_zero_fill:
3267 *
3268 * Zero-fill the specified page.
3269 */
3270 void
3271 vm_page_zero_fill(
3272 vm_page_t m)
3273 {
3274 XPR(XPR_VM_PAGE,
3275 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3276 m->object, m->offset, m, 0,0);
3277
3278 VM_PAGE_CHECK(m);
3279
3280 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3281 pmap_zero_page(m->phys_page);
3282 }
3283
3284 /*
3285 * vm_page_part_copy:
3286 *
3287 * copy part of one page to another
3288 */
3289
3290 void
3291 vm_page_part_copy(
3292 vm_page_t src_m,
3293 vm_offset_t src_pa,
3294 vm_page_t dst_m,
3295 vm_offset_t dst_pa,
3296 vm_size_t len)
3297 {
3298 VM_PAGE_CHECK(src_m);
3299 VM_PAGE_CHECK(dst_m);
3300
3301 pmap_copy_part_page(src_m->phys_page, src_pa,
3302 dst_m->phys_page, dst_pa, len);
3303 }
3304
3305 /*
3306 * vm_page_copy:
3307 *
3308 * Copy one page to another
3309 *
3310 * ENCRYPTED SWAP:
3311 * The source page should not be encrypted. The caller should
3312 * make sure the page is decrypted first, if necessary.
3313 */
3314
3315 int vm_page_copy_cs_validations = 0;
3316 int vm_page_copy_cs_tainted = 0;
3317
3318 void
3319 vm_page_copy(
3320 vm_page_t src_m,
3321 vm_page_t dest_m)
3322 {
3323 XPR(XPR_VM_PAGE,
3324 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3325 src_m->object, src_m->offset,
3326 dest_m->object, dest_m->offset,
3327 0);
3328
3329 VM_PAGE_CHECK(src_m);
3330 VM_PAGE_CHECK(dest_m);
3331
3332 /*
3333 * ENCRYPTED SWAP:
3334 * The source page should not be encrypted at this point.
3335 * The destination page will therefore not contain encrypted
3336 * data after the copy.
3337 */
3338 if (src_m->encrypted) {
3339 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3340 }
3341 dest_m->encrypted = FALSE;
3342
3343 if (src_m->object != VM_OBJECT_NULL &&
3344 src_m->object->code_signed) {
3345 /*
3346 * We're copying a page from a code-signed object.
3347 * Whoever ends up mapping the copy page might care about
3348 * the original page's integrity, so let's validate the
3349 * source page now.
3350 */
3351 vm_page_copy_cs_validations++;
3352 vm_page_validate_cs(src_m);
3353 }
3354 /*
3355 * Propagate the cs_tainted bit to the copy page. Do not propagate
3356 * the cs_validated bit.
3357 */
3358 dest_m->cs_tainted = src_m->cs_tainted;
3359 if (dest_m->cs_tainted) {
3360 vm_page_copy_cs_tainted++;
3361 }
3362
3363 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3364 }
3365
3366 #if MACH_ASSERT
3367 static void
3368 _vm_page_print(
3369 vm_page_t p)
3370 {
3371 printf("vm_page %p: \n", p);
3372 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3373 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3374 printf(" next=%p\n", p->next);
3375 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3376 printf(" wire_count=%u\n", p->wire_count);
3377
3378 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3379 (p->local ? "" : "!"),
3380 (p->inactive ? "" : "!"),
3381 (p->active ? "" : "!"),
3382 (p->pageout_queue ? "" : "!"),
3383 (p->speculative ? "" : "!"),
3384 (p->laundry ? "" : "!"));
3385 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3386 (p->free ? "" : "!"),
3387 (p->reference ? "" : "!"),
3388 (p->gobbled ? "" : "!"),
3389 (p->private ? "" : "!"),
3390 (p->throttled ? "" : "!"));
3391 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3392 (p->busy ? "" : "!"),
3393 (p->wanted ? "" : "!"),
3394 (p->tabled ? "" : "!"),
3395 (p->fictitious ? "" : "!"),
3396 (p->pmapped ? "" : "!"),
3397 (p->wpmapped ? "" : "!"));
3398 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3399 (p->pageout ? "" : "!"),
3400 (p->absent ? "" : "!"),
3401 (p->error ? "" : "!"),
3402 (p->dirty ? "" : "!"),
3403 (p->cleaning ? "" : "!"),
3404 (p->precious ? "" : "!"),
3405 (p->clustered ? "" : "!"));
3406 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3407 (p->overwriting ? "" : "!"),
3408 (p->restart ? "" : "!"),
3409 (p->unusual ? "" : "!"),
3410 (p->encrypted ? "" : "!"),
3411 (p->encrypted_cleaning ? "" : "!"));
3412 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3413 (p->list_req_pending ? "" : "!"),
3414 (p->dump_cleaning ? "" : "!"),
3415 (p->cs_validated ? "" : "!"),
3416 (p->cs_tainted ? "" : "!"),
3417 (p->no_cache ? "" : "!"));
3418 printf(" %szero_fill\n",
3419 (p->zero_fill ? "" : "!"));
3420
3421 printf("phys_page=0x%x\n", p->phys_page);
3422 }
3423
3424 /*
3425 * Check that the list of pages is ordered by
3426 * ascending physical address and has no holes.
3427 */
3428 static int
3429 vm_page_verify_contiguous(
3430 vm_page_t pages,
3431 unsigned int npages)
3432 {
3433 register vm_page_t m;
3434 unsigned int page_count;
3435 vm_offset_t prev_addr;
3436
3437 prev_addr = pages->phys_page;
3438 page_count = 1;
3439 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3440 if (m->phys_page != prev_addr + 1) {
3441 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3442 m, (long)prev_addr, m->phys_page);
3443 printf("pages %p page_count %d\n", pages, page_count);
3444 panic("vm_page_verify_contiguous: not contiguous!");
3445 }
3446 prev_addr = m->phys_page;
3447 ++page_count;
3448 }
3449 if (page_count != npages) {
3450 printf("pages %p actual count 0x%x but requested 0x%x\n",
3451 pages, page_count, npages);
3452 panic("vm_page_verify_contiguous: count error");
3453 }
3454 return 1;
3455 }
3456
3457
3458 /*
3459 * Check the free lists for proper length etc.
3460 */
3461 static unsigned int
3462 vm_page_verify_free_list(
3463 queue_head_t *vm_page_queue,
3464 unsigned int color,
3465 vm_page_t look_for_page,
3466 boolean_t expect_page)
3467 {
3468 unsigned int npages;
3469 vm_page_t m;
3470 vm_page_t prev_m;
3471 boolean_t found_page;
3472
3473 found_page = FALSE;
3474 npages = 0;
3475 prev_m = (vm_page_t) vm_page_queue;
3476 queue_iterate(vm_page_queue,
3477 m,
3478 vm_page_t,
3479 pageq) {
3480 if (m == look_for_page) {
3481 found_page = TRUE;
3482 }
3483 if ((vm_page_t) m->pageq.prev != prev_m)
3484 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3485 color, npages, m, m->pageq.prev, prev_m);
3486 if ( ! m->free )
3487 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3488 color, npages, m);
3489 if ( ! m->busy )
3490 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3491 color, npages, m);
3492 if ( color != (unsigned int) -1 && (m->phys_page & vm_color_mask) != color)
3493 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3494 color, npages, m, m->phys_page & vm_color_mask, color);
3495 ++npages;
3496 prev_m = m;
3497 }
3498 if (look_for_page != VM_PAGE_NULL) {
3499 unsigned int other_color;
3500
3501 if (expect_page && !found_page) {
3502 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3503 color, npages, look_for_page, look_for_page->phys_page);
3504 _vm_page_print(look_for_page);
3505 for (other_color = 0;
3506 other_color < vm_colors;
3507 other_color++) {
3508 if (other_color == color)
3509 continue;
3510 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3511 other_color, look_for_page, FALSE);
3512 }
3513 if (color != (unsigned int) -1) {
3514 vm_page_verify_free_list(&vm_lopage_queue_free,
3515 (unsigned int) -1, look_for_page, FALSE);
3516 }
3517
3518 panic("vm_page_verify_free_list(color=%u)\n", color);
3519 }
3520 if (!expect_page && found_page) {
3521 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3522 color, npages, look_for_page, look_for_page->phys_page);
3523 }
3524 }
3525 return npages;
3526 }
3527
3528 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3529 static void
3530 vm_page_verify_free_lists( void )
3531 {
3532 unsigned int color, npages, nlopages;
3533
3534 if (! vm_page_verify_free_lists_enabled)
3535 return;
3536
3537 npages = 0;
3538
3539 lck_mtx_lock(&vm_page_queue_free_lock);
3540
3541 for( color = 0; color < vm_colors; color++ ) {
3542 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3543 color, VM_PAGE_NULL, FALSE);
3544 }
3545
3546 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3547 (unsigned int) -1,
3548 VM_PAGE_NULL, FALSE);
3549 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3550 panic("vm_page_verify_free_lists: "
3551 "npages %u free_count %d nlopages %u lo_free_count %u",
3552 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3553 lck_mtx_unlock(&vm_page_queue_free_lock);
3554 }
3555
3556 void
3557 vm_page_queues_assert(
3558 vm_page_t mem,
3559 int val)
3560 {
3561 if (mem->free + mem->active + mem->inactive + mem->speculative +
3562 mem->throttled + mem->pageout_queue > (val)) {
3563 _vm_page_print(mem);
3564 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3565 }
3566 if (VM_PAGE_WIRED(mem)) {
3567 assert(!mem->active);
3568 assert(!mem->inactive);
3569 assert(!mem->speculative);
3570 assert(!mem->throttled);
3571 }
3572 }
3573 #endif /* MACH_ASSERT */
3574
3575
3576 /*
3577 * CONTIGUOUS PAGE ALLOCATION
3578 *
3579 * Find a region large enough to contain at least n pages
3580 * of contiguous physical memory.
3581 *
3582 * This is done by traversing the vm_page_t array in a linear fashion
3583 * we assume that the vm_page_t array has the avaiable physical pages in an
3584 * ordered, ascending list... this is currently true of all our implementations
3585 * and must remain so... there can be 'holes' in the array... we also can
3586 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3587 * which use to happen via 'vm_page_convert'... that function was no longer
3588 * being called and was removed...
3589 *
3590 * The basic flow consists of stabilizing some of the interesting state of
3591 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3592 * sweep at the beginning of the array looking for pages that meet our criterea
3593 * for a 'stealable' page... currently we are pretty conservative... if the page
3594 * meets this criterea and is physically contiguous to the previous page in the 'run'
3595 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3596 * and start to develop a new run... if at this point we've already considered
3597 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3598 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3599 * to other threads trying to acquire free pages (or move pages from q to q),
3600 * and then continue from the spot we left off... we only make 1 pass through the
3601 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3602 * which steals the pages from the queues they're currently on... pages on the free
3603 * queue can be stolen directly... pages that are on any of the other queues
3604 * must be removed from the object they are tabled on... this requires taking the
3605 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3606 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3607 * dump the pages we've currently stolen back to the free list, and pick up our
3608 * scan from the point where we aborted the 'current' run.
3609 *
3610 *
3611 * Requirements:
3612 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3613 *
3614 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3615 *
3616 * Algorithm:
3617 */
3618
3619 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3620
3621
3622 #define RESET_STATE_OF_RUN() \
3623 MACRO_BEGIN \
3624 prevcontaddr = -2; \
3625 start_pnum = -1; \
3626 free_considered = 0; \
3627 substitute_needed = 0; \
3628 npages = 0; \
3629 MACRO_END
3630
3631 /*
3632 * Can we steal in-use (i.e. not free) pages when searching for
3633 * physically-contiguous pages ?
3634 */
3635 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3636
3637 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3638 #if DEBUG
3639 int vm_page_find_contig_debug = 0;
3640 #endif
3641
3642 static vm_page_t
3643 vm_page_find_contiguous(
3644 unsigned int contig_pages,
3645 ppnum_t max_pnum,
3646 ppnum_t pnum_mask,
3647 boolean_t wire,
3648 int flags)
3649 {
3650 vm_page_t m = NULL;
3651 ppnum_t prevcontaddr;
3652 ppnum_t start_pnum;
3653 unsigned int npages, considered, scanned;
3654 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3655 unsigned int idx_last_contig_page_found = 0;
3656 int free_considered, free_available;
3657 int substitute_needed;
3658 boolean_t wrapped;
3659 #if DEBUG
3660 clock_sec_t tv_start_sec, tv_end_sec;
3661 clock_usec_t tv_start_usec, tv_end_usec;
3662 #endif
3663 #if MACH_ASSERT
3664 int yielded = 0;
3665 int dumped_run = 0;
3666 int stolen_pages = 0;
3667 #endif
3668
3669 if (contig_pages == 0)
3670 return VM_PAGE_NULL;
3671
3672 #if MACH_ASSERT
3673 vm_page_verify_free_lists();
3674 #endif
3675 #if DEBUG
3676 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3677 #endif
3678 vm_page_lock_queues();
3679 lck_mtx_lock(&vm_page_queue_free_lock);
3680
3681 RESET_STATE_OF_RUN();
3682
3683 scanned = 0;
3684 considered = 0;
3685 free_available = vm_page_free_count - vm_page_free_reserved;
3686
3687 wrapped = FALSE;
3688
3689 if(flags & KMA_LOMEM)
3690 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3691 else
3692 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3693
3694 orig_last_idx = idx_last_contig_page_found;
3695 last_idx = orig_last_idx;
3696
3697 for (page_idx = last_idx, start_idx = last_idx;
3698 npages < contig_pages && page_idx < vm_pages_count;
3699 page_idx++) {
3700 retry:
3701 if (wrapped &&
3702 npages == 0 &&
3703 page_idx >= orig_last_idx) {
3704 /*
3705 * We're back where we started and we haven't
3706 * found any suitable contiguous range. Let's
3707 * give up.
3708 */
3709 break;
3710 }
3711 scanned++;
3712 m = &vm_pages[page_idx];
3713
3714 assert(!m->fictitious);
3715 assert(!m->private);
3716
3717 if (max_pnum && m->phys_page > max_pnum) {
3718 /* no more low pages... */
3719 break;
3720 }
3721 if (!npages && ((m->phys_page & pnum_mask) != 0)) {
3722 /*
3723 * not aligned
3724 */
3725 RESET_STATE_OF_RUN();
3726
3727 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3728 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3729 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3730 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3731 m->pageout) {
3732 /*
3733 * page is in a transient state
3734 * or a state we don't want to deal
3735 * with, so don't consider it which
3736 * means starting a new run
3737 */
3738 RESET_STATE_OF_RUN();
3739
3740 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3741 /*
3742 * page needs to be on one of our queues
3743 * in order for it to be stable behind the
3744 * locks we hold at this point...
3745 * if not, don't consider it which
3746 * means starting a new run
3747 */
3748 RESET_STATE_OF_RUN();
3749
3750 } else if (!m->free && (!m->tabled || m->busy)) {
3751 /*
3752 * pages on the free list are always 'busy'
3753 * so we couldn't test for 'busy' in the check
3754 * for the transient states... pages that are
3755 * 'free' are never 'tabled', so we also couldn't
3756 * test for 'tabled'. So we check here to make
3757 * sure that a non-free page is not busy and is
3758 * tabled on an object...
3759 * if not, don't consider it which
3760 * means starting a new run
3761 */
3762 RESET_STATE_OF_RUN();
3763
3764 } else {
3765 if (m->phys_page != prevcontaddr + 1) {
3766 if ((m->phys_page & pnum_mask) != 0) {
3767 RESET_STATE_OF_RUN();
3768 goto did_consider;
3769 } else {
3770 npages = 1;
3771 start_idx = page_idx;
3772 start_pnum = m->phys_page;
3773 }
3774 } else {
3775 npages++;
3776 }
3777 prevcontaddr = m->phys_page;
3778
3779 VM_PAGE_CHECK(m);
3780 if (m->free) {
3781 free_considered++;
3782 } else {
3783 /*
3784 * This page is not free.
3785 * If we can't steal used pages,
3786 * we have to give up this run
3787 * and keep looking.
3788 * Otherwise, we might need to
3789 * move the contents of this page
3790 * into a substitute page.
3791 */
3792 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3793 if (m->pmapped || m->dirty) {
3794 substitute_needed++;
3795 }
3796 #else
3797 RESET_STATE_OF_RUN();
3798 #endif
3799 }
3800
3801 if ((free_considered + substitute_needed) > free_available) {
3802 /*
3803 * if we let this run continue
3804 * we will end up dropping the vm_page_free_count
3805 * below the reserve limit... we need to abort
3806 * this run, but we can at least re-consider this
3807 * page... thus the jump back to 'retry'
3808 */
3809 RESET_STATE_OF_RUN();
3810
3811 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3812 considered++;
3813 goto retry;
3814 }
3815 /*
3816 * free_available == 0
3817 * so can't consider any free pages... if
3818 * we went to retry in this case, we'd
3819 * get stuck looking at the same page
3820 * w/o making any forward progress
3821 * we also want to take this path if we've already
3822 * reached our limit that controls the lock latency
3823 */
3824 }
3825 }
3826 did_consider:
3827 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3828
3829 lck_mtx_unlock(&vm_page_queue_free_lock);
3830 vm_page_unlock_queues();
3831
3832 mutex_pause(0);
3833
3834 vm_page_lock_queues();
3835 lck_mtx_lock(&vm_page_queue_free_lock);
3836
3837 RESET_STATE_OF_RUN();
3838 /*
3839 * reset our free page limit since we
3840 * dropped the lock protecting the vm_page_free_queue
3841 */
3842 free_available = vm_page_free_count - vm_page_free_reserved;
3843 considered = 0;
3844 #if MACH_ASSERT
3845 yielded++;
3846 #endif
3847 goto retry;
3848 }
3849 considered++;
3850 }
3851 m = VM_PAGE_NULL;
3852
3853 if (npages != contig_pages) {
3854 if (!wrapped) {
3855 /*
3856 * We didn't find a contiguous range but we didn't
3857 * start from the very first page.
3858 * Start again from the very first page.
3859 */
3860 RESET_STATE_OF_RUN();
3861 if( flags & KMA_LOMEM)
3862 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3863 else
3864 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3865 last_idx = 0;
3866 page_idx = last_idx;
3867 wrapped = TRUE;
3868 goto retry;
3869 }
3870 lck_mtx_unlock(&vm_page_queue_free_lock);
3871 } else {
3872 vm_page_t m1;
3873 vm_page_t m2;
3874 unsigned int cur_idx;
3875 unsigned int tmp_start_idx;
3876 vm_object_t locked_object = VM_OBJECT_NULL;
3877 boolean_t abort_run = FALSE;
3878
3879 assert(page_idx - start_idx == contig_pages);
3880
3881 tmp_start_idx = start_idx;
3882
3883 /*
3884 * first pass through to pull the free pages
3885 * off of the free queue so that in case we
3886 * need substitute pages, we won't grab any
3887 * of the free pages in the run... we'll clear
3888 * the 'free' bit in the 2nd pass, and even in
3889 * an abort_run case, we'll collect all of the
3890 * free pages in this run and return them to the free list
3891 */
3892 while (start_idx < page_idx) {
3893
3894 m1 = &vm_pages[start_idx++];
3895
3896 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3897 assert(m1->free);
3898 #endif
3899
3900 if (m1->free) {
3901 unsigned int color;
3902
3903 color = m1->phys_page & vm_color_mask;
3904 #if MACH_ASSERT
3905 vm_page_verify_free_list(&vm_page_queue_free[color],
3906 color, m1, TRUE);
3907 #endif
3908 queue_remove(&vm_page_queue_free[color],
3909 m1,
3910 vm_page_t,
3911 pageq);
3912 m1->pageq.next = NULL;
3913 m1->pageq.prev = NULL;
3914 #if MACH_ASSERT
3915 vm_page_verify_free_list(&vm_page_queue_free[color],
3916 color, VM_PAGE_NULL, FALSE);
3917 #endif
3918 /*
3919 * Clear the "free" bit so that this page
3920 * does not get considered for another
3921 * concurrent physically-contiguous allocation.
3922 */
3923 m1->free = FALSE;
3924 assert(m1->busy);
3925
3926 vm_page_free_count--;
3927 }
3928 }
3929 /*
3930 * adjust global freelist counts
3931 */
3932 if (vm_page_free_count < vm_page_free_count_minimum)
3933 vm_page_free_count_minimum = vm_page_free_count;
3934
3935 if( flags & KMA_LOMEM)
3936 vm_page_lomem_find_contiguous_last_idx = page_idx;
3937 else
3938 vm_page_find_contiguous_last_idx = page_idx;
3939
3940 /*
3941 * we can drop the free queue lock at this point since
3942 * we've pulled any 'free' candidates off of the list
3943 * we need it dropped so that we can do a vm_page_grab
3944 * when substituing for pmapped/dirty pages
3945 */
3946 lck_mtx_unlock(&vm_page_queue_free_lock);
3947
3948 start_idx = tmp_start_idx;
3949 cur_idx = page_idx - 1;
3950
3951 while (start_idx++ < page_idx) {
3952 /*
3953 * must go through the list from back to front
3954 * so that the page list is created in the
3955 * correct order - low -> high phys addresses
3956 */
3957 m1 = &vm_pages[cur_idx--];
3958
3959 assert(!m1->free);
3960 if (m1->object == VM_OBJECT_NULL) {
3961 /*
3962 * page has already been removed from
3963 * the free list in the 1st pass
3964 */
3965 assert(m1->offset == (vm_object_offset_t) -1);
3966 assert(m1->busy);
3967 assert(!m1->wanted);
3968 assert(!m1->laundry);
3969 } else {
3970 vm_object_t object;
3971
3972 if (abort_run == TRUE)
3973 continue;
3974
3975 object = m1->object;
3976
3977 if (object != locked_object) {
3978 if (locked_object) {
3979 vm_object_unlock(locked_object);
3980 locked_object = VM_OBJECT_NULL;
3981 }
3982 if (vm_object_lock_try(object))
3983 locked_object = object;
3984 }
3985 if (locked_object == VM_OBJECT_NULL ||
3986 (VM_PAGE_WIRED(m1) || m1->gobbled ||
3987 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3988 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3989 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3990
3991 if (locked_object) {
3992 vm_object_unlock(locked_object);
3993 locked_object = VM_OBJECT_NULL;
3994 }
3995 tmp_start_idx = cur_idx;
3996 abort_run = TRUE;
3997 continue;
3998 }
3999 if (m1->pmapped || m1->dirty) {
4000 int refmod;
4001 vm_object_offset_t offset;
4002
4003 m2 = vm_page_grab();
4004
4005 if (m2 == VM_PAGE_NULL) {
4006 if (locked_object) {
4007 vm_object_unlock(locked_object);
4008 locked_object = VM_OBJECT_NULL;
4009 }
4010 tmp_start_idx = cur_idx;
4011 abort_run = TRUE;
4012 continue;
4013 }
4014 if (m1->pmapped)
4015 refmod = pmap_disconnect(m1->phys_page);
4016 else
4017 refmod = 0;
4018 vm_page_copy(m1, m2);
4019
4020 m2->reference = m1->reference;
4021 m2->dirty = m1->dirty;
4022
4023 if (refmod & VM_MEM_REFERENCED)
4024 m2->reference = TRUE;
4025 if (refmod & VM_MEM_MODIFIED)
4026 m2->dirty = TRUE;
4027 offset = m1->offset;
4028
4029 /*
4030 * completely cleans up the state
4031 * of the page so that it is ready
4032 * to be put onto the free list, or
4033 * for this purpose it looks like it
4034 * just came off of the free list
4035 */
4036 vm_page_free_prepare(m1);
4037
4038 /*
4039 * make sure we clear the ref/mod state
4040 * from the pmap layer... else we risk
4041 * inheriting state from the last time
4042 * this page was used...
4043 */
4044 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4045 /*
4046 * now put the substitute page on the object
4047 */
4048 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
4049
4050 if (m2->reference)
4051 vm_page_activate(m2);
4052 else
4053 vm_page_deactivate(m2);
4054
4055 PAGE_WAKEUP_DONE(m2);
4056
4057 } else {
4058 /*
4059 * completely cleans up the state
4060 * of the page so that it is ready
4061 * to be put onto the free list, or
4062 * for this purpose it looks like it
4063 * just came off of the free list
4064 */
4065 vm_page_free_prepare(m1);
4066 }
4067 #if MACH_ASSERT
4068 stolen_pages++;
4069 #endif
4070 }
4071 m1->pageq.next = (queue_entry_t) m;
4072 m1->pageq.prev = NULL;
4073 m = m1;
4074 }
4075 if (locked_object) {
4076 vm_object_unlock(locked_object);
4077 locked_object = VM_OBJECT_NULL;
4078 }
4079
4080 if (abort_run == TRUE) {
4081 if (m != VM_PAGE_NULL) {
4082 vm_page_free_list(m, FALSE);
4083 }
4084 #if MACH_ASSERT
4085 dumped_run++;
4086 #endif
4087 /*
4088 * want the index of the last
4089 * page in this run that was
4090 * successfully 'stolen', so back
4091 * it up 1 for the auto-decrement on use
4092 * and 1 more to bump back over this page
4093 */
4094 page_idx = tmp_start_idx + 2;
4095 if (page_idx >= vm_pages_count) {
4096 if (wrapped)
4097 goto done_scanning;
4098 page_idx = last_idx = 0;
4099 wrapped = TRUE;
4100 }
4101 abort_run = FALSE;
4102
4103 /*
4104 * We didn't find a contiguous range but we didn't
4105 * start from the very first page.
4106 * Start again from the very first page.
4107 */
4108 RESET_STATE_OF_RUN();
4109
4110 if( flags & KMA_LOMEM)
4111 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4112 else
4113 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4114
4115 last_idx = page_idx;
4116
4117 lck_mtx_lock(&vm_page_queue_free_lock);
4118 /*
4119 * reset our free page limit since we
4120 * dropped the lock protecting the vm_page_free_queue
4121 */
4122 free_available = vm_page_free_count - vm_page_free_reserved;
4123 goto retry;
4124 }
4125
4126 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4127
4128 if (wire == TRUE)
4129 m1->wire_count++;
4130 else
4131 m1->gobbled = TRUE;
4132 }
4133 if (wire == FALSE)
4134 vm_page_gobble_count += npages;
4135
4136 /*
4137 * gobbled pages are also counted as wired pages
4138 */
4139 vm_page_wire_count += npages;
4140
4141 assert(vm_page_verify_contiguous(m, npages));
4142 }
4143 done_scanning:
4144 vm_page_unlock_queues();
4145
4146 #if DEBUG
4147 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4148
4149 tv_end_sec -= tv_start_sec;
4150 if (tv_end_usec < tv_start_usec) {
4151 tv_end_sec--;
4152 tv_end_usec += 1000000;
4153 }
4154 tv_end_usec -= tv_start_usec;
4155 if (tv_end_usec >= 1000000) {
4156 tv_end_sec++;
4157 tv_end_sec -= 1000000;
4158 }
4159 if (vm_page_find_contig_debug) {
4160 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4161 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4162 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4163 scanned, yielded, dumped_run, stolen_pages);
4164 }
4165
4166 #endif
4167 #if MACH_ASSERT
4168 vm_page_verify_free_lists();
4169 #endif
4170 return m;
4171 }
4172
4173 /*
4174 * Allocate a list of contiguous, wired pages.
4175 */
4176 kern_return_t
4177 cpm_allocate(
4178 vm_size_t size,
4179 vm_page_t *list,
4180 ppnum_t max_pnum,
4181 ppnum_t pnum_mask,
4182 boolean_t wire,
4183 int flags)
4184 {
4185 vm_page_t pages;
4186 unsigned int npages;
4187
4188 if (size % page_size != 0)
4189 return KERN_INVALID_ARGUMENT;
4190
4191 npages = (unsigned int) (size / PAGE_SIZE);
4192 if (npages != size / PAGE_SIZE) {
4193 /* 32-bit overflow */
4194 return KERN_INVALID_ARGUMENT;
4195 }
4196
4197 /*
4198 * Obtain a pointer to a subset of the free
4199 * list large enough to satisfy the request;
4200 * the region will be physically contiguous.
4201 */
4202 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4203
4204 if (pages == VM_PAGE_NULL)
4205 return KERN_NO_SPACE;
4206 /*
4207 * determine need for wakeups
4208 */
4209 if ((vm_page_free_count < vm_page_free_min) ||
4210 ((vm_page_free_count < vm_page_free_target) &&
4211 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4212 thread_wakeup((event_t) &vm_page_free_wanted);
4213
4214 #if CONFIG_EMBEDDED
4215 {
4216 int percent_avail;
4217
4218 /*
4219 * Decide if we need to poke the memorystatus notification thread.
4220 */
4221 percent_avail =
4222 (vm_page_active_count + vm_page_inactive_count +
4223 vm_page_speculative_count + vm_page_free_count +
4224 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
4225 atop_64(max_mem);
4226 if (percent_avail <= (kern_memorystatus_level - 5)) {
4227 kern_memorystatus_level = percent_avail;
4228 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4229 }
4230 }
4231 #endif
4232 /*
4233 * The CPM pages should now be available and
4234 * ordered by ascending physical address.
4235 */
4236 assert(vm_page_verify_contiguous(pages, npages));
4237
4238 *list = pages;
4239 return KERN_SUCCESS;
4240 }
4241
4242
4243 kern_return_t
4244 vm_page_alloc_list(
4245 int page_count,
4246 int flags,
4247 vm_page_t *list)
4248 {
4249 vm_page_t lo_page_list = VM_PAGE_NULL;
4250 vm_page_t mem;
4251 int i;
4252
4253 if ( !(flags & KMA_LOMEM))
4254 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4255
4256 for (i = 0; i < page_count; i++) {
4257
4258 mem = vm_page_grablo();
4259
4260 if (mem == VM_PAGE_NULL) {
4261 if (lo_page_list)
4262 vm_page_free_list(lo_page_list, FALSE);
4263
4264 *list = VM_PAGE_NULL;
4265
4266 return (KERN_RESOURCE_SHORTAGE);
4267 }
4268 mem->pageq.next = (queue_entry_t) lo_page_list;
4269 lo_page_list = mem;
4270 }
4271 *list = lo_page_list;
4272
4273 return (KERN_SUCCESS);
4274 }
4275
4276 void
4277 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4278 {
4279 page->offset = offset;
4280 }
4281
4282 vm_page_t
4283 vm_page_get_next(vm_page_t page)
4284 {
4285 return ((vm_page_t) page->pageq.next);
4286 }
4287
4288 vm_object_offset_t
4289 vm_page_get_offset(vm_page_t page)
4290 {
4291 return (page->offset);
4292 }
4293
4294 ppnum_t
4295 vm_page_get_phys_page(vm_page_t page)
4296 {
4297 return (page->phys_page);
4298 }
4299
4300
4301 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4302
4303 #if HIBERNATION
4304
4305 static vm_page_t hibernate_gobble_queue;
4306
4307 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4308
4309 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4310 static int hibernate_flush_dirty_pages(void);
4311 static int hibernate_flush_queue(queue_head_t *, int);
4312 static void hibernate_dirty_page(vm_page_t);
4313
4314 void hibernate_flush_wait(void);
4315 void hibernate_mark_in_progress(void);
4316 void hibernate_clear_in_progress(void);
4317
4318
4319 struct hibernate_statistics {
4320 int hibernate_considered;
4321 int hibernate_reentered_on_q;
4322 int hibernate_found_dirty;
4323 int hibernate_skipped_cleaning;
4324 int hibernate_skipped_transient;
4325 int hibernate_skipped_precious;
4326 int hibernate_queue_nolock;
4327 int hibernate_queue_paused;
4328 int hibernate_throttled;
4329 int hibernate_throttle_timeout;
4330 int hibernate_drained;
4331 int hibernate_drain_timeout;
4332 int cd_lock_failed;
4333 int cd_found_precious;
4334 int cd_found_wired;
4335 int cd_found_busy;
4336 int cd_found_unusual;
4337 int cd_found_cleaning;
4338 int cd_found_laundry;
4339 int cd_found_dirty;
4340 int cd_local_free;
4341 int cd_total_free;
4342 int cd_vm_page_wire_count;
4343 int cd_pages;
4344 int cd_discarded;
4345 int cd_count_wire;
4346 } hibernate_stats;
4347
4348
4349
4350 static int
4351 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4352 {
4353 wait_result_t wait_result;
4354
4355 vm_page_lock_queues();
4356
4357 while (q->pgo_laundry) {
4358
4359 q->pgo_draining = TRUE;
4360
4361 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4362
4363 vm_page_unlock_queues();
4364
4365 wait_result = thread_block(THREAD_CONTINUE_NULL);
4366
4367 if (wait_result == THREAD_TIMED_OUT) {
4368 hibernate_stats.hibernate_drain_timeout++;
4369 return (1);
4370 }
4371 vm_page_lock_queues();
4372
4373 hibernate_stats.hibernate_drained++;
4374 }
4375 vm_page_unlock_queues();
4376
4377 return (0);
4378 }
4379
4380 static void
4381 hibernate_dirty_page(vm_page_t m)
4382 {
4383 vm_object_t object = m->object;
4384 struct vm_pageout_queue *q;
4385
4386 #if DEBUG
4387 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4388 #endif
4389 vm_object_lock_assert_exclusive(object);
4390
4391 /*
4392 * protect the object from collapse -
4393 * locking in the object's paging_offset.
4394 */
4395 vm_object_paging_begin(object);
4396
4397 m->list_req_pending = TRUE;
4398 m->cleaning = TRUE;
4399 m->busy = TRUE;
4400
4401 if (object->internal == TRUE)
4402 q = &vm_pageout_queue_internal;
4403 else
4404 q = &vm_pageout_queue_external;
4405
4406 /*
4407 * pgo_laundry count is tied to the laundry bit
4408 */
4409 m->laundry = TRUE;
4410 q->pgo_laundry++;
4411
4412 m->pageout_queue = TRUE;
4413 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4414
4415 if (q->pgo_idle == TRUE) {
4416 q->pgo_idle = FALSE;
4417 thread_wakeup((event_t) &q->pgo_pending);
4418 }
4419 }
4420
4421 static int
4422 hibernate_flush_queue(queue_head_t *q, int qcount)
4423 {
4424 vm_page_t m;
4425 vm_object_t l_object = NULL;
4426 vm_object_t m_object = NULL;
4427 int refmod_state = 0;
4428 int try_failed_count = 0;
4429 int retval = 0;
4430 int current_run = 0;
4431 struct vm_pageout_queue *iq;
4432 struct vm_pageout_queue *eq;
4433 struct vm_pageout_queue *tq;
4434
4435
4436 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4437
4438 iq = &vm_pageout_queue_internal;
4439 eq = &vm_pageout_queue_external;
4440
4441 vm_page_lock_queues();
4442
4443 while (qcount && !queue_empty(q)) {
4444
4445 if (current_run++ == 1000) {
4446 if (hibernate_should_abort()) {
4447 retval = 1;
4448 break;
4449 }
4450 current_run = 0;
4451 }
4452
4453 m = (vm_page_t) queue_first(q);
4454 m_object = m->object;
4455
4456 /*
4457 * check to see if we currently are working
4458 * with the same object... if so, we've
4459 * already got the lock
4460 */
4461 if (m_object != l_object) {
4462 /*
4463 * the object associated with candidate page is
4464 * different from the one we were just working
4465 * with... dump the lock if we still own it
4466 */
4467 if (l_object != NULL) {
4468 vm_object_unlock(l_object);
4469 l_object = NULL;
4470 }
4471 /*
4472 * Try to lock object; since we've alread got the
4473 * page queues lock, we can only 'try' for this one.
4474 * if the 'try' fails, we need to do a mutex_pause
4475 * to allow the owner of the object lock a chance to
4476 * run...
4477 */
4478 if ( !vm_object_lock_try_scan(m_object)) {
4479
4480 if (try_failed_count > 20) {
4481 hibernate_stats.hibernate_queue_nolock++;
4482
4483 goto reenter_pg_on_q;
4484 }
4485 vm_pageout_scan_wants_object = m_object;
4486
4487 vm_page_unlock_queues();
4488 mutex_pause(try_failed_count++);
4489 vm_page_lock_queues();
4490
4491 hibernate_stats.hibernate_queue_paused++;
4492 continue;
4493 } else {
4494 l_object = m_object;
4495 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4496 }
4497 }
4498 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4499 /*
4500 * page is not to be cleaned
4501 * put it back on the head of its queue
4502 */
4503 if (m->cleaning)
4504 hibernate_stats.hibernate_skipped_cleaning++;
4505 else
4506 hibernate_stats.hibernate_skipped_transient++;
4507
4508 goto reenter_pg_on_q;
4509 }
4510 if ( !m_object->pager_initialized && m_object->pager_created)
4511 goto reenter_pg_on_q;
4512
4513 if (m_object->copy == VM_OBJECT_NULL) {
4514 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4515 /*
4516 * let the normal hibernate image path
4517 * deal with these
4518 */
4519 goto reenter_pg_on_q;
4520 }
4521 }
4522 if ( !m->dirty && m->pmapped) {
4523 refmod_state = pmap_get_refmod(m->phys_page);
4524
4525 if ((refmod_state & VM_MEM_MODIFIED))
4526 m->dirty = TRUE;
4527 } else
4528 refmod_state = 0;
4529
4530 if ( !m->dirty) {
4531 /*
4532 * page is not to be cleaned
4533 * put it back on the head of its queue
4534 */
4535 if (m->precious)
4536 hibernate_stats.hibernate_skipped_precious++;
4537
4538 goto reenter_pg_on_q;
4539 }
4540 tq = NULL;
4541
4542 if (m_object->internal) {
4543 if (VM_PAGE_Q_THROTTLED(iq))
4544 tq = iq;
4545 } else if (VM_PAGE_Q_THROTTLED(eq))
4546 tq = eq;
4547
4548 if (tq != NULL) {
4549 wait_result_t wait_result;
4550 int wait_count = 5;
4551
4552 if (l_object != NULL) {
4553 vm_object_unlock(l_object);
4554 l_object = NULL;
4555 }
4556 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4557
4558 tq->pgo_throttled = TRUE;
4559
4560 while (retval == 0) {
4561
4562 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4563
4564 vm_page_unlock_queues();
4565
4566 wait_result = thread_block(THREAD_CONTINUE_NULL);
4567
4568 vm_page_lock_queues();
4569
4570 if (hibernate_should_abort())
4571 retval = 1;
4572
4573 if (wait_result != THREAD_TIMED_OUT)
4574 break;
4575
4576 if (--wait_count == 0) {
4577 hibernate_stats.hibernate_throttle_timeout++;
4578 retval = 1;
4579 }
4580 }
4581 if (retval)
4582 break;
4583
4584 hibernate_stats.hibernate_throttled++;
4585
4586 continue;
4587 }
4588 VM_PAGE_QUEUES_REMOVE(m);
4589
4590 hibernate_dirty_page(m);
4591
4592 hibernate_stats.hibernate_found_dirty++;
4593
4594 goto next_pg;
4595
4596 reenter_pg_on_q:
4597 queue_remove(q, m, vm_page_t, pageq);
4598 queue_enter(q, m, vm_page_t, pageq);
4599
4600 hibernate_stats.hibernate_reentered_on_q++;
4601 next_pg:
4602 hibernate_stats.hibernate_considered++;
4603
4604 qcount--;
4605 try_failed_count = 0;
4606 }
4607 if (l_object != NULL) {
4608 vm_object_unlock(l_object);
4609 l_object = NULL;
4610 }
4611 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4612
4613 vm_page_unlock_queues();
4614
4615 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4616
4617 return (retval);
4618 }
4619
4620
4621 static int
4622 hibernate_flush_dirty_pages()
4623 {
4624 struct vm_speculative_age_q *aq;
4625 uint32_t i;
4626
4627 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4628
4629 if (vm_page_local_q) {
4630 for (i = 0; i < vm_page_local_q_count; i++)
4631 vm_page_reactivate_local(i, TRUE, FALSE);
4632 }
4633
4634 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4635 int qcount;
4636 vm_page_t m;
4637
4638 aq = &vm_page_queue_speculative[i];
4639
4640 if (queue_empty(&aq->age_q))
4641 continue;
4642 qcount = 0;
4643
4644 vm_page_lockspin_queues();
4645
4646 queue_iterate(&aq->age_q,
4647 m,
4648 vm_page_t,
4649 pageq)
4650 {
4651 qcount++;
4652 }
4653 vm_page_unlock_queues();
4654
4655 if (qcount) {
4656 if (hibernate_flush_queue(&aq->age_q, qcount))
4657 return (1);
4658 }
4659 }
4660 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4661 return (1);
4662 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4663 return (1);
4664 if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4665 return (1);
4666
4667 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4668 return (1);
4669 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4670 }
4671
4672
4673 extern void IOSleep(unsigned int);
4674 extern int sync_internal(void);
4675
4676 int
4677 hibernate_flush_memory()
4678 {
4679 int retval;
4680
4681 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4682
4683 IOSleep(2 * 1000);
4684
4685 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4686
4687 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4688 if (consider_buffer_cache_collect != NULL) {
4689
4690 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4691
4692 sync_internal();
4693 (void)(*consider_buffer_cache_collect)(1);
4694 consider_zone_gc(1);
4695
4696 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4697 }
4698 }
4699 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4700
4701 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4702 hibernate_stats.hibernate_considered,
4703 hibernate_stats.hibernate_reentered_on_q,
4704 hibernate_stats.hibernate_found_dirty);
4705 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4706 hibernate_stats.hibernate_skipped_cleaning,
4707 hibernate_stats.hibernate_skipped_transient,
4708 hibernate_stats.hibernate_skipped_precious,
4709 hibernate_stats.hibernate_queue_nolock);
4710 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4711 hibernate_stats.hibernate_queue_paused,
4712 hibernate_stats.hibernate_throttled,
4713 hibernate_stats.hibernate_throttle_timeout,
4714 hibernate_stats.hibernate_drained,
4715 hibernate_stats.hibernate_drain_timeout);
4716
4717 return (retval);
4718 }
4719
4720 static void
4721 hibernate_page_list_zero(hibernate_page_list_t *list)
4722 {
4723 uint32_t bank;
4724 hibernate_bitmap_t * bitmap;
4725
4726 bitmap = &list->bank_bitmap[0];
4727 for (bank = 0; bank < list->bank_count; bank++)
4728 {
4729 uint32_t last_bit;
4730
4731 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4732 // set out-of-bound bits at end of bitmap.
4733 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4734 if (last_bit)
4735 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4736
4737 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4738 }
4739 }
4740
4741 void
4742 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4743 {
4744 uint32_t i;
4745 vm_page_t m;
4746 uint64_t start, end, timeout, nsec;
4747 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4748 clock_get_uptime(&start);
4749
4750 for (i = 0; i < gobble_count; i++)
4751 {
4752 while (VM_PAGE_NULL == (m = vm_page_grab()))
4753 {
4754 clock_get_uptime(&end);
4755 if (end >= timeout)
4756 break;
4757 VM_PAGE_WAIT();
4758 }
4759 if (!m)
4760 break;
4761 m->busy = FALSE;
4762 vm_page_gobble(m);
4763
4764 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4765 hibernate_gobble_queue = m;
4766 }
4767
4768 clock_get_uptime(&end);
4769 absolutetime_to_nanoseconds(end - start, &nsec);
4770 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4771 }
4772
4773 void
4774 hibernate_free_gobble_pages(void)
4775 {
4776 vm_page_t m, next;
4777 uint32_t count = 0;
4778
4779 m = (vm_page_t) hibernate_gobble_queue;
4780 while(m)
4781 {
4782 next = (vm_page_t) m->pageq.next;
4783 vm_page_free(m);
4784 count++;
4785 m = next;
4786 }
4787 hibernate_gobble_queue = VM_PAGE_NULL;
4788
4789 if (count)
4790 HIBLOG("Freed %d pages\n", count);
4791 }
4792
4793 static boolean_t
4794 hibernate_consider_discard(vm_page_t m)
4795 {
4796 vm_object_t object = NULL;
4797 int refmod_state;
4798 boolean_t discard = FALSE;
4799
4800 do
4801 {
4802 if (m->private)
4803 panic("hibernate_consider_discard: private");
4804
4805 if (!vm_object_lock_try(m->object)) {
4806 hibernate_stats.cd_lock_failed++;
4807 break;
4808 }
4809 object = m->object;
4810
4811 if (VM_PAGE_WIRED(m)) {
4812 hibernate_stats.cd_found_wired++;
4813 break;
4814 }
4815 if (m->precious) {
4816 hibernate_stats.cd_found_precious++;
4817 break;
4818 }
4819 if (m->busy || !object->alive) {
4820 /*
4821 * Somebody is playing with this page.
4822 */
4823 hibernate_stats.cd_found_busy++;
4824 break;
4825 }
4826 if (m->absent || m->unusual || m->error) {
4827 /*
4828 * If it's unusual in anyway, ignore it
4829 */
4830 hibernate_stats.cd_found_unusual++;
4831 break;
4832 }
4833 if (m->cleaning) {
4834 hibernate_stats.cd_found_cleaning++;
4835 break;
4836 }
4837 if (m->laundry || m->list_req_pending) {
4838 hibernate_stats.cd_found_laundry++;
4839 break;
4840 }
4841 if (!m->dirty)
4842 {
4843 refmod_state = pmap_get_refmod(m->phys_page);
4844
4845 if (refmod_state & VM_MEM_REFERENCED)
4846 m->reference = TRUE;
4847 if (refmod_state & VM_MEM_MODIFIED)
4848 m->dirty = TRUE;
4849 }
4850
4851 /*
4852 * If it's clean or purgeable we can discard the page on wakeup.
4853 */
4854 discard = (!m->dirty)
4855 || (VM_PURGABLE_VOLATILE == object->purgable)
4856 || (VM_PURGABLE_EMPTY == object->purgable);
4857
4858 if (discard == FALSE)
4859 hibernate_stats.cd_found_dirty++;
4860 }
4861 while (FALSE);
4862
4863 if (object)
4864 vm_object_unlock(object);
4865
4866 return (discard);
4867 }
4868
4869
4870 static void
4871 hibernate_discard_page(vm_page_t m)
4872 {
4873 if (m->absent || m->unusual || m->error)
4874 /*
4875 * If it's unusual in anyway, ignore
4876 */
4877 return;
4878
4879 if (m->pmapped == TRUE)
4880 {
4881 __unused int refmod_state = pmap_disconnect(m->phys_page);
4882 }
4883
4884 if (m->laundry)
4885 panic("hibernate_discard_page(%p) laundry", m);
4886 if (m->private)
4887 panic("hibernate_discard_page(%p) private", m);
4888 if (m->fictitious)
4889 panic("hibernate_discard_page(%p) fictitious", m);
4890
4891 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4892 {
4893 /* object should be on a queue */
4894 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4895 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4896 assert(old_queue);
4897 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4898 makes sure these locks are uncontended before sleep */
4899 vm_purgeable_token_delete_first(old_queue);
4900 m->object->purgable = VM_PURGABLE_EMPTY;
4901 }
4902
4903 vm_page_free(m);
4904 }
4905
4906 /*
4907 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4908 pages known to VM to not need saving are subtracted.
4909 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4910 */
4911
4912 void
4913 hibernate_page_list_setall(hibernate_page_list_t * page_list,
4914 hibernate_page_list_t * page_list_wired,
4915 uint32_t * pagesOut)
4916 {
4917 uint64_t start, end, nsec;
4918 vm_page_t m;
4919 uint32_t pages = page_list->page_count;
4920 uint32_t count_zf = 0, count_throttled = 0;
4921 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
4922 uint32_t count_wire = pages;
4923 uint32_t count_discard_active = 0;
4924 uint32_t count_discard_inactive = 0;
4925 uint32_t count_discard_purgeable = 0;
4926 uint32_t count_discard_speculative = 0;
4927 uint32_t i;
4928 uint32_t bank;
4929 hibernate_bitmap_t * bitmap;
4930 hibernate_bitmap_t * bitmap_wired;
4931
4932
4933 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
4934
4935 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
4936
4937 clock_get_uptime(&start);
4938
4939 hibernate_page_list_zero(page_list);
4940 hibernate_page_list_zero(page_list_wired);
4941
4942 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
4943 hibernate_stats.cd_pages = pages;
4944
4945 if (vm_page_local_q) {
4946 for (i = 0; i < vm_page_local_q_count; i++)
4947 vm_page_reactivate_local(i, TRUE, TRUE);
4948 }
4949
4950 m = (vm_page_t) hibernate_gobble_queue;
4951 while(m)
4952 {
4953 pages--;
4954 count_wire--;
4955 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4956 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4957 m = (vm_page_t) m->pageq.next;
4958 }
4959 #ifndef PPC
4960 for( i = 0; i < real_ncpus; i++ )
4961 {
4962 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
4963 {
4964 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
4965 {
4966 pages--;
4967 count_wire--;
4968 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4969 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4970
4971 hibernate_stats.cd_local_free++;
4972 hibernate_stats.cd_total_free++;
4973 }
4974 }
4975 }
4976 #endif
4977 for( i = 0; i < vm_colors; i++ )
4978 {
4979 queue_iterate(&vm_page_queue_free[i],
4980 m,
4981 vm_page_t,
4982 pageq)
4983 {
4984 pages--;
4985 count_wire--;
4986 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4987 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4988
4989 hibernate_stats.cd_total_free++;
4990 }
4991 }
4992
4993 queue_iterate(&vm_lopage_queue_free,
4994 m,
4995 vm_page_t,
4996 pageq)
4997 {
4998 pages--;
4999 count_wire--;
5000 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5001 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5002
5003 hibernate_stats.cd_total_free++;
5004 }
5005
5006 queue_iterate( &vm_page_queue_throttled,
5007 m,
5008 vm_page_t,
5009 pageq )
5010 {
5011 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5012 && hibernate_consider_discard(m))
5013 {
5014 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5015 count_discard_inactive++;
5016 }
5017 else
5018 count_throttled++;
5019 count_wire--;
5020 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5021 }
5022
5023 queue_iterate( &vm_page_queue_zf,
5024 m,
5025 vm_page_t,
5026 pageq )
5027 {
5028 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5029 && hibernate_consider_discard(m))
5030 {
5031 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5032 if (m->dirty)
5033 count_discard_purgeable++;
5034 else
5035 count_discard_inactive++;
5036 }
5037 else
5038 count_zf++;
5039 count_wire--;
5040 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5041 }
5042
5043 queue_iterate( &vm_page_queue_inactive,
5044 m,
5045 vm_page_t,
5046 pageq )
5047 {
5048 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5049 && hibernate_consider_discard(m))
5050 {
5051 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5052 if (m->dirty)
5053 count_discard_purgeable++;
5054 else
5055 count_discard_inactive++;
5056 }
5057 else
5058 count_inactive++;
5059 count_wire--;
5060 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5061 }
5062
5063 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5064 {
5065 queue_iterate(&vm_page_queue_speculative[i].age_q,
5066 m,
5067 vm_page_t,
5068 pageq)
5069 {
5070 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5071 && hibernate_consider_discard(m))
5072 {
5073 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5074 count_discard_speculative++;
5075 }
5076 else
5077 count_speculative++;
5078 count_wire--;
5079 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5080 }
5081 }
5082
5083 queue_iterate( &vm_page_queue_active,
5084 m,
5085 vm_page_t,
5086 pageq )
5087 {
5088 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5089 && hibernate_consider_discard(m))
5090 {
5091 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5092 if (m->dirty)
5093 count_discard_purgeable++;
5094 else
5095 count_discard_active++;
5096 }
5097 else
5098 count_active++;
5099 count_wire--;
5100 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5101 }
5102
5103 // pull wired from hibernate_bitmap
5104
5105 bitmap = &page_list->bank_bitmap[0];
5106 bitmap_wired = &page_list_wired->bank_bitmap[0];
5107 for (bank = 0; bank < page_list->bank_count; bank++)
5108 {
5109 for (i = 0; i < bitmap->bitmapwords; i++)
5110 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5111 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5112 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5113 }
5114
5115 // machine dependent adjustments
5116 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5117
5118 hibernate_stats.cd_count_wire = count_wire;
5119 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5120
5121 clock_get_uptime(&end);
5122 absolutetime_to_nanoseconds(end - start, &nsec);
5123 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5124
5125 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5126 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5127 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5128
5129 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5130
5131 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5132 }
5133
5134 void
5135 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5136 {
5137 uint64_t start, end, nsec;
5138 vm_page_t m;
5139 vm_page_t next;
5140 uint32_t i;
5141 uint32_t count_discard_active = 0;
5142 uint32_t count_discard_inactive = 0;
5143 uint32_t count_discard_purgeable = 0;
5144 uint32_t count_discard_speculative = 0;
5145
5146 clock_get_uptime(&start);
5147
5148 m = (vm_page_t) queue_first(&vm_page_queue_zf);
5149 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5150 {
5151 next = (vm_page_t) m->pageq.next;
5152 if (hibernate_page_bittst(page_list, m->phys_page))
5153 {
5154 if (m->dirty)
5155 count_discard_purgeable++;
5156 else
5157 count_discard_inactive++;
5158 hibernate_discard_page(m);
5159 }
5160 m = next;
5161 }
5162
5163 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5164 {
5165 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5166 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5167 {
5168 next = (vm_page_t) m->pageq.next;
5169 if (hibernate_page_bittst(page_list, m->phys_page))
5170 {
5171 count_discard_speculative++;
5172 hibernate_discard_page(m);
5173 }
5174 m = next;
5175 }
5176 }
5177
5178 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5179 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5180 {
5181 next = (vm_page_t) m->pageq.next;
5182 if (hibernate_page_bittst(page_list, m->phys_page))
5183 {
5184 if (m->dirty)
5185 count_discard_purgeable++;
5186 else
5187 count_discard_inactive++;
5188 hibernate_discard_page(m);
5189 }
5190 m = next;
5191 }
5192
5193 m = (vm_page_t) queue_first(&vm_page_queue_active);
5194 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5195 {
5196 next = (vm_page_t) m->pageq.next;
5197 if (hibernate_page_bittst(page_list, m->phys_page))
5198 {
5199 if (m->dirty)
5200 count_discard_purgeable++;
5201 else
5202 count_discard_active++;
5203 hibernate_discard_page(m);
5204 }
5205 m = next;
5206 }
5207
5208 clock_get_uptime(&end);
5209 absolutetime_to_nanoseconds(end - start, &nsec);
5210 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5211 nsec / 1000000ULL,
5212 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5213 }
5214
5215 #endif /* HIBERNATION */
5216
5217 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5218
5219 #include <mach_vm_debug.h>
5220 #if MACH_VM_DEBUG
5221
5222 #include <mach_debug/hash_info.h>
5223 #include <vm/vm_debug.h>
5224
5225 /*
5226 * Routine: vm_page_info
5227 * Purpose:
5228 * Return information about the global VP table.
5229 * Fills the buffer with as much information as possible
5230 * and returns the desired size of the buffer.
5231 * Conditions:
5232 * Nothing locked. The caller should provide
5233 * possibly-pageable memory.
5234 */
5235
5236 unsigned int
5237 vm_page_info(
5238 hash_info_bucket_t *info,
5239 unsigned int count)
5240 {
5241 unsigned int i;
5242 lck_spin_t *bucket_lock;
5243
5244 if (vm_page_bucket_count < count)
5245 count = vm_page_bucket_count;
5246
5247 for (i = 0; i < count; i++) {
5248 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5249 unsigned int bucket_count = 0;
5250 vm_page_t m;
5251
5252 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5253 lck_spin_lock(bucket_lock);
5254
5255 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5256 bucket_count++;
5257
5258 lck_spin_unlock(bucket_lock);
5259
5260 /* don't touch pageable memory while holding locks */
5261 info[i].hib_count = bucket_count;
5262 }
5263
5264 return vm_page_bucket_count;
5265 }
5266 #endif /* MACH_VM_DEBUG */
5267
5268 #include <mach_kdb.h>
5269 #if MACH_KDB
5270
5271 #include <ddb/db_output.h>
5272 #include <vm/vm_print.h>
5273 #define printf kdbprintf
5274
5275 /*
5276 * Routine: vm_page_print [exported]
5277 */
5278 void
5279 vm_page_print(
5280 db_addr_t db_addr)
5281 {
5282 vm_page_t p;
5283
5284 p = (vm_page_t) (long) db_addr;
5285
5286 iprintf("page 0x%x\n", p);
5287
5288 db_indent += 2;
5289
5290 iprintf("object=0x%x", p->object);
5291 printf(", offset=0x%x", p->offset);
5292 printf(", wire_count=%d", p->wire_count);
5293
5294 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5295 (p->local ? "" : "!"),
5296 (p->inactive ? "" : "!"),
5297 (p->active ? "" : "!"),
5298 (p->throttled ? "" : "!"),
5299 (p->gobbled ? "" : "!"),
5300 (p->laundry ? "" : "!"),
5301 (p->free ? "" : "!"),
5302 (p->reference ? "" : "!"),
5303 (p->encrypted ? "" : "!"));
5304 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5305 (p->busy ? "" : "!"),
5306 (p->wanted ? "" : "!"),
5307 (p->tabled ? "" : "!"),
5308 (p->fictitious ? "" : "!"),
5309 (p->private ? "" : "!"),
5310 (p->precious ? "" : "!"));
5311 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5312 (p->absent ? "" : "!"),
5313 (p->error ? "" : "!"),
5314 (p->dirty ? "" : "!"),
5315 (p->cleaning ? "" : "!"),
5316 (p->pageout ? "" : "!"),
5317 (p->clustered ? "" : "!"));
5318 iprintf("%soverwriting, %srestart, %sunusual\n",
5319 (p->overwriting ? "" : "!"),
5320 (p->restart ? "" : "!"),
5321 (p->unusual ? "" : "!"));
5322
5323 iprintf("phys_page=0x%x", p->phys_page);
5324
5325 db_indent -= 2;
5326 }
5327 #endif /* MACH_KDB */