]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
397914b0c9c683ff305e734b810519476bb4bc98
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <pexpert/pexpert.h>
89
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
93
94 #include <IOKit/IOHibernatePrivate.h>
95
96
97 #include <sys/kern_memorystatus.h>
98
99 #include <sys/kdebug.h>
100
101 boolean_t vm_page_free_verify = TRUE;
102
103 uint32_t vm_lopage_free_count = 0;
104 uint32_t vm_lopage_free_limit = 0;
105 uint32_t vm_lopage_lowater = 0;
106 boolean_t vm_lopage_refill = FALSE;
107 boolean_t vm_lopage_needed = FALSE;
108
109 lck_mtx_ext_t vm_page_queue_lock_ext;
110 lck_mtx_ext_t vm_page_queue_free_lock_ext;
111 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
112
113 int speculative_age_index = 0;
114 int speculative_steal_index = 0;
115 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
116
117
118 __private_extern__ void vm_page_init_lck_grp(void);
119
120 static void vm_page_free_prepare(vm_page_t page);
121 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
122
123
124
125
126 /*
127 * Associated with page of user-allocatable memory is a
128 * page structure.
129 */
130
131 /*
132 * These variables record the values returned by vm_page_bootstrap,
133 * for debugging purposes. The implementation of pmap_steal_memory
134 * and pmap_startup here also uses them internally.
135 */
136
137 vm_offset_t virtual_space_start;
138 vm_offset_t virtual_space_end;
139 int vm_page_pages;
140
141 /*
142 * The vm_page_lookup() routine, which provides for fast
143 * (virtual memory object, offset) to page lookup, employs
144 * the following hash table. The vm_page_{insert,remove}
145 * routines install and remove associations in the table.
146 * [This table is often called the virtual-to-physical,
147 * or VP, table.]
148 */
149 typedef struct {
150 vm_page_t pages;
151 #if MACH_PAGE_HASH_STATS
152 int cur_count; /* current count */
153 int hi_count; /* high water mark */
154 #endif /* MACH_PAGE_HASH_STATS */
155 } vm_page_bucket_t;
156
157
158 #define BUCKETS_PER_LOCK 16
159
160 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
161 unsigned int vm_page_bucket_count = 0; /* How big is array? */
162 unsigned int vm_page_hash_mask; /* Mask for hash function */
163 unsigned int vm_page_hash_shift; /* Shift for hash function */
164 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
165 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
166
167 lck_spin_t *vm_page_bucket_locks;
168
169
170 #if MACH_PAGE_HASH_STATS
171 /* This routine is only for debug. It is intended to be called by
172 * hand by a developer using a kernel debugger. This routine prints
173 * out vm_page_hash table statistics to the kernel debug console.
174 */
175 void
176 hash_debug(void)
177 {
178 int i;
179 int numbuckets = 0;
180 int highsum = 0;
181 int maxdepth = 0;
182
183 for (i = 0; i < vm_page_bucket_count; i++) {
184 if (vm_page_buckets[i].hi_count) {
185 numbuckets++;
186 highsum += vm_page_buckets[i].hi_count;
187 if (vm_page_buckets[i].hi_count > maxdepth)
188 maxdepth = vm_page_buckets[i].hi_count;
189 }
190 }
191 printf("Total number of buckets: %d\n", vm_page_bucket_count);
192 printf("Number used buckets: %d = %d%%\n",
193 numbuckets, 100*numbuckets/vm_page_bucket_count);
194 printf("Number unused buckets: %d = %d%%\n",
195 vm_page_bucket_count - numbuckets,
196 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
197 printf("Sum of bucket max depth: %d\n", highsum);
198 printf("Average bucket depth: %d.%2d\n",
199 highsum/vm_page_bucket_count,
200 highsum%vm_page_bucket_count);
201 printf("Maximum bucket depth: %d\n", maxdepth);
202 }
203 #endif /* MACH_PAGE_HASH_STATS */
204
205 /*
206 * The virtual page size is currently implemented as a runtime
207 * variable, but is constant once initialized using vm_set_page_size.
208 * This initialization must be done in the machine-dependent
209 * bootstrap sequence, before calling other machine-independent
210 * initializations.
211 *
212 * All references to the virtual page size outside this
213 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
214 * constants.
215 */
216 vm_size_t page_size = PAGE_SIZE;
217 vm_size_t page_mask = PAGE_MASK;
218 int page_shift = PAGE_SHIFT;
219
220 /*
221 * Resident page structures are initialized from
222 * a template (see vm_page_alloc).
223 *
224 * When adding a new field to the virtual memory
225 * object structure, be sure to add initialization
226 * (see vm_page_bootstrap).
227 */
228 struct vm_page vm_page_template;
229
230 vm_page_t vm_pages = VM_PAGE_NULL;
231 unsigned int vm_pages_count = 0;
232 ppnum_t vm_page_lowest = 0;
233
234 /*
235 * Resident pages that represent real memory
236 * are allocated from a set of free lists,
237 * one per color.
238 */
239 unsigned int vm_colors;
240 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
241 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
242 queue_head_t vm_page_queue_free[MAX_COLORS];
243 unsigned int vm_page_free_wanted;
244 unsigned int vm_page_free_wanted_privileged;
245 unsigned int vm_page_free_count;
246 unsigned int vm_page_fictitious_count;
247
248 unsigned int vm_page_free_count_minimum; /* debugging */
249
250 /*
251 * Occasionally, the virtual memory system uses
252 * resident page structures that do not refer to
253 * real pages, for example to leave a page with
254 * important state information in the VP table.
255 *
256 * These page structures are allocated the way
257 * most other kernel structures are.
258 */
259 zone_t vm_page_zone;
260 vm_locks_array_t vm_page_locks;
261 decl_lck_mtx_data(,vm_page_alloc_lock)
262 unsigned int io_throttle_zero_fill;
263
264 unsigned int vm_page_local_q_count = 0;
265 unsigned int vm_page_local_q_soft_limit = 250;
266 unsigned int vm_page_local_q_hard_limit = 500;
267 struct vplq *vm_page_local_q = NULL;
268
269 /*
270 * Fictitious pages don't have a physical address,
271 * but we must initialize phys_page to something.
272 * For debugging, this should be a strange value
273 * that the pmap module can recognize in assertions.
274 */
275 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
276
277 /*
278 * Guard pages are not accessible so they don't
279 * need a physical address, but we need to enter
280 * one in the pmap.
281 * Let's make it recognizable and make sure that
282 * we don't use a real physical page with that
283 * physical address.
284 */
285 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
286
287 /*
288 * Resident page structures are also chained on
289 * queues that are used by the page replacement
290 * system (pageout daemon). These queues are
291 * defined here, but are shared by the pageout
292 * module. The inactive queue is broken into
293 * inactive and zf for convenience as the
294 * pageout daemon often assignes a higher
295 * affinity to zf pages
296 */
297 queue_head_t vm_page_queue_active;
298 queue_head_t vm_page_queue_inactive;
299 queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
300 queue_head_t vm_page_queue_throttled;
301
302 unsigned int vm_page_active_count;
303 unsigned int vm_page_inactive_count;
304 unsigned int vm_page_throttled_count;
305 unsigned int vm_page_speculative_count;
306 unsigned int vm_page_wire_count;
307 unsigned int vm_page_wire_count_initial;
308 unsigned int vm_page_gobble_count = 0;
309 unsigned int vm_page_wire_count_warning = 0;
310 unsigned int vm_page_gobble_count_warning = 0;
311
312 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
313 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
314 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
315
316 #if DEVELOPMENT || DEBUG
317 unsigned int vm_page_speculative_recreated = 0;
318 unsigned int vm_page_speculative_created = 0;
319 unsigned int vm_page_speculative_used = 0;
320 #endif
321
322 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
323 ppnum_t max_valid_low_ppnum = 0xffffffff;
324
325
326 /*
327 * Several page replacement parameters are also
328 * shared with this module, so that page allocation
329 * (done here in vm_page_alloc) can trigger the
330 * pageout daemon.
331 */
332 unsigned int vm_page_free_target = 0;
333 unsigned int vm_page_free_min = 0;
334 unsigned int vm_page_throttle_limit = 0;
335 uint32_t vm_page_creation_throttle = 0;
336 unsigned int vm_page_inactive_target = 0;
337 unsigned int vm_page_inactive_min = 0;
338 unsigned int vm_page_free_reserved = 0;
339 unsigned int vm_page_throttle_count = 0;
340
341 /*
342 * The VM system has a couple of heuristics for deciding
343 * that pages are "uninteresting" and should be placed
344 * on the inactive queue as likely candidates for replacement.
345 * These variables let the heuristics be controlled at run-time
346 * to make experimentation easier.
347 */
348
349 boolean_t vm_page_deactivate_hint = TRUE;
350
351 struct vm_page_stats_reusable vm_page_stats_reusable;
352
353 /*
354 * vm_set_page_size:
355 *
356 * Sets the page size, perhaps based upon the memory
357 * size. Must be called before any use of page-size
358 * dependent functions.
359 *
360 * Sets page_shift and page_mask from page_size.
361 */
362 void
363 vm_set_page_size(void)
364 {
365 page_mask = page_size - 1;
366
367 if ((page_mask & page_size) != 0)
368 panic("vm_set_page_size: page size not a power of two");
369
370 for (page_shift = 0; ; page_shift++)
371 if ((1U << page_shift) == page_size)
372 break;
373 }
374
375
376 /* Called once during statup, once the cache geometry is known.
377 */
378 static void
379 vm_page_set_colors( void )
380 {
381 unsigned int n, override;
382
383 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
384 n = override;
385 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
386 n = vm_cache_geometry_colors;
387 else n = DEFAULT_COLORS; /* use default if all else fails */
388
389 if ( n == 0 )
390 n = 1;
391 if ( n > MAX_COLORS )
392 n = MAX_COLORS;
393
394 /* the count must be a power of 2 */
395 if ( ( n & (n - 1)) != 0 )
396 panic("vm_page_set_colors");
397
398 vm_colors = n;
399 vm_color_mask = n - 1;
400 }
401
402
403 lck_grp_t vm_page_lck_grp_free;
404 lck_grp_t vm_page_lck_grp_queue;
405 lck_grp_t vm_page_lck_grp_local;
406 lck_grp_t vm_page_lck_grp_purge;
407 lck_grp_t vm_page_lck_grp_alloc;
408 lck_grp_t vm_page_lck_grp_bucket;
409 lck_grp_attr_t vm_page_lck_grp_attr;
410 lck_attr_t vm_page_lck_attr;
411
412
413 __private_extern__ void
414 vm_page_init_lck_grp(void)
415 {
416 /*
417 * initialze the vm_page lock world
418 */
419 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
420 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
421 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
422 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
423 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
424 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
425 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
426 lck_attr_setdefault(&vm_page_lck_attr);
427 }
428
429 void
430 vm_page_init_local_q()
431 {
432 unsigned int num_cpus;
433 unsigned int i;
434 struct vplq *t_local_q;
435
436 num_cpus = ml_get_max_cpus();
437
438 /*
439 * no point in this for a uni-processor system
440 */
441 if (num_cpus >= 2) {
442 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
443
444 for (i = 0; i < num_cpus; i++) {
445 struct vpl *lq;
446
447 lq = &t_local_q[i].vpl_un.vpl;
448 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
449 queue_init(&lq->vpl_queue);
450 lq->vpl_count = 0;
451 }
452 vm_page_local_q_count = num_cpus;
453
454 vm_page_local_q = (struct vplq *)t_local_q;
455 }
456 }
457
458
459 /*
460 * vm_page_bootstrap:
461 *
462 * Initializes the resident memory module.
463 *
464 * Allocates memory for the page cells, and
465 * for the object/offset-to-page hash table headers.
466 * Each page cell is initialized and placed on the free list.
467 * Returns the range of available kernel virtual memory.
468 */
469
470 void
471 vm_page_bootstrap(
472 vm_offset_t *startp,
473 vm_offset_t *endp)
474 {
475 register vm_page_t m;
476 unsigned int i;
477 unsigned int log1;
478 unsigned int log2;
479 unsigned int size;
480
481 /*
482 * Initialize the vm_page template.
483 */
484
485 m = &vm_page_template;
486 bzero(m, sizeof (*m));
487
488 m->pageq.next = NULL;
489 m->pageq.prev = NULL;
490 m->listq.next = NULL;
491 m->listq.prev = NULL;
492 m->next = VM_PAGE_NULL;
493
494 m->object = VM_OBJECT_NULL; /* reset later */
495 m->offset = (vm_object_offset_t) -1; /* reset later */
496
497 m->wire_count = 0;
498 m->local = FALSE;
499 m->inactive = FALSE;
500 m->active = FALSE;
501 m->pageout_queue = FALSE;
502 m->speculative = FALSE;
503 m->laundry = FALSE;
504 m->free = FALSE;
505 m->reference = FALSE;
506 m->gobbled = FALSE;
507 m->private = FALSE;
508 m->throttled = FALSE;
509 m->__unused_pageq_bits = 0;
510
511 m->phys_page = 0; /* reset later */
512
513 m->busy = TRUE;
514 m->wanted = FALSE;
515 m->tabled = FALSE;
516 m->fictitious = FALSE;
517 m->pmapped = FALSE;
518 m->wpmapped = FALSE;
519 m->pageout = FALSE;
520 m->absent = FALSE;
521 m->error = FALSE;
522 m->dirty = FALSE;
523 m->cleaning = FALSE;
524 m->precious = FALSE;
525 m->clustered = FALSE;
526 m->overwriting = FALSE;
527 m->restart = FALSE;
528 m->unusual = FALSE;
529 m->encrypted = FALSE;
530 m->encrypted_cleaning = FALSE;
531 m->list_req_pending = FALSE;
532 m->dump_cleaning = FALSE;
533 m->cs_validated = FALSE;
534 m->cs_tainted = FALSE;
535 m->no_cache = FALSE;
536 m->zero_fill = FALSE;
537 m->reusable = FALSE;
538 m->slid = FALSE;
539 m->__unused_object_bits = 0;
540
541
542 /*
543 * Initialize the page queues.
544 */
545 vm_page_init_lck_grp();
546
547 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
548 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
549 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
550
551 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
552 int group;
553
554 purgeable_queues[i].token_q_head = 0;
555 purgeable_queues[i].token_q_tail = 0;
556 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
557 queue_init(&purgeable_queues[i].objq[group]);
558
559 purgeable_queues[i].type = i;
560 purgeable_queues[i].new_pages = 0;
561 #if MACH_ASSERT
562 purgeable_queues[i].debug_count_tokens = 0;
563 purgeable_queues[i].debug_count_objects = 0;
564 #endif
565 };
566
567 for (i = 0; i < MAX_COLORS; i++ )
568 queue_init(&vm_page_queue_free[i]);
569
570 queue_init(&vm_lopage_queue_free);
571 queue_init(&vm_page_queue_active);
572 queue_init(&vm_page_queue_inactive);
573 queue_init(&vm_page_queue_throttled);
574 queue_init(&vm_page_queue_zf);
575
576 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
577 queue_init(&vm_page_queue_speculative[i].age_q);
578
579 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
580 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
581 }
582 vm_page_free_wanted = 0;
583 vm_page_free_wanted_privileged = 0;
584
585 vm_page_set_colors();
586
587
588 /*
589 * Steal memory for the map and zone subsystems.
590 */
591
592 vm_map_steal_memory();
593 zone_steal_memory();
594
595 /*
596 * Allocate (and initialize) the virtual-to-physical
597 * table hash buckets.
598 *
599 * The number of buckets should be a power of two to
600 * get a good hash function. The following computation
601 * chooses the first power of two that is greater
602 * than the number of physical pages in the system.
603 */
604
605 if (vm_page_bucket_count == 0) {
606 unsigned int npages = pmap_free_pages();
607
608 vm_page_bucket_count = 1;
609 while (vm_page_bucket_count < npages)
610 vm_page_bucket_count <<= 1;
611 }
612 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
613
614 vm_page_hash_mask = vm_page_bucket_count - 1;
615
616 /*
617 * Calculate object shift value for hashing algorithm:
618 * O = log2(sizeof(struct vm_object))
619 * B = log2(vm_page_bucket_count)
620 * hash shifts the object left by
621 * B/2 - O
622 */
623 size = vm_page_bucket_count;
624 for (log1 = 0; size > 1; log1++)
625 size /= 2;
626 size = sizeof(struct vm_object);
627 for (log2 = 0; size > 1; log2++)
628 size /= 2;
629 vm_page_hash_shift = log1/2 - log2 + 1;
630
631 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
632 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
633 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
634
635 if (vm_page_hash_mask & vm_page_bucket_count)
636 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
637
638 vm_page_buckets = (vm_page_bucket_t *)
639 pmap_steal_memory(vm_page_bucket_count *
640 sizeof(vm_page_bucket_t));
641
642 vm_page_bucket_locks = (lck_spin_t *)
643 pmap_steal_memory(vm_page_bucket_lock_count *
644 sizeof(lck_spin_t));
645
646 for (i = 0; i < vm_page_bucket_count; i++) {
647 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
648
649 bucket->pages = VM_PAGE_NULL;
650 #if MACH_PAGE_HASH_STATS
651 bucket->cur_count = 0;
652 bucket->hi_count = 0;
653 #endif /* MACH_PAGE_HASH_STATS */
654 }
655
656 for (i = 0; i < vm_page_bucket_lock_count; i++)
657 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
658
659 /*
660 * Machine-dependent code allocates the resident page table.
661 * It uses vm_page_init to initialize the page frames.
662 * The code also returns to us the virtual space available
663 * to the kernel. We don't trust the pmap module
664 * to get the alignment right.
665 */
666
667 pmap_startup(&virtual_space_start, &virtual_space_end);
668 virtual_space_start = round_page(virtual_space_start);
669 virtual_space_end = trunc_page(virtual_space_end);
670
671 *startp = virtual_space_start;
672 *endp = virtual_space_end;
673
674 /*
675 * Compute the initial "wire" count.
676 * Up until now, the pages which have been set aside are not under
677 * the VM system's control, so although they aren't explicitly
678 * wired, they nonetheless can't be moved. At this moment,
679 * all VM managed pages are "free", courtesy of pmap_startup.
680 */
681 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
682 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
683 vm_page_wire_count_initial = vm_page_wire_count;
684 vm_page_free_count_minimum = vm_page_free_count;
685
686 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
687 vm_page_free_count, vm_page_wire_count);
688
689 simple_lock_init(&vm_paging_lock, 0);
690 }
691
692 #ifndef MACHINE_PAGES
693 /*
694 * We implement pmap_steal_memory and pmap_startup with the help
695 * of two simpler functions, pmap_virtual_space and pmap_next_page.
696 */
697
698 void *
699 pmap_steal_memory(
700 vm_size_t size)
701 {
702 vm_offset_t addr, vaddr;
703 ppnum_t phys_page;
704
705 /*
706 * We round the size to a round multiple.
707 */
708
709 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
710
711 /*
712 * If this is the first call to pmap_steal_memory,
713 * we have to initialize ourself.
714 */
715
716 if (virtual_space_start == virtual_space_end) {
717 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
718
719 /*
720 * The initial values must be aligned properly, and
721 * we don't trust the pmap module to do it right.
722 */
723
724 virtual_space_start = round_page(virtual_space_start);
725 virtual_space_end = trunc_page(virtual_space_end);
726 }
727
728 /*
729 * Allocate virtual memory for this request.
730 */
731
732 addr = virtual_space_start;
733 virtual_space_start += size;
734
735 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
736
737 /*
738 * Allocate and map physical pages to back new virtual pages.
739 */
740
741 for (vaddr = round_page(addr);
742 vaddr < addr + size;
743 vaddr += PAGE_SIZE) {
744
745 if (!pmap_next_page_hi(&phys_page))
746 panic("pmap_steal_memory");
747
748 /*
749 * XXX Logically, these mappings should be wired,
750 * but some pmap modules barf if they are.
751 */
752 #if defined(__LP64__)
753 pmap_pre_expand(kernel_pmap, vaddr);
754 #endif
755
756 pmap_enter(kernel_pmap, vaddr, phys_page,
757 VM_PROT_READ|VM_PROT_WRITE,
758 VM_WIMG_USE_DEFAULT, FALSE);
759 /*
760 * Account for newly stolen memory
761 */
762 vm_page_wire_count++;
763
764 }
765
766 return (void *) addr;
767 }
768
769 void
770 pmap_startup(
771 vm_offset_t *startp,
772 vm_offset_t *endp)
773 {
774 unsigned int i, npages, pages_initialized, fill, fillval;
775 ppnum_t phys_page;
776 addr64_t tmpaddr;
777
778 /*
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
781 */
782
783 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
784 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
785 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
786
787 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
788
789 /*
790 * Initialize the page frames.
791 */
792 for (i = 0, pages_initialized = 0; i < npages; i++) {
793 if (!pmap_next_page(&phys_page))
794 break;
795 if (pages_initialized == 0 || phys_page < vm_page_lowest)
796 vm_page_lowest = phys_page;
797
798 vm_page_init(&vm_pages[i], phys_page, FALSE);
799 vm_page_pages++;
800 pages_initialized++;
801 }
802 vm_pages_count = pages_initialized;
803
804 /*
805 * Check if we want to initialize pages to a known value
806 */
807 fill = 0; /* Assume no fill */
808 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
809
810 // -debug code remove
811 if (2 == vm_himemory_mode) {
812 // free low -> high so high is preferred
813 for (i = 1; i <= pages_initialized; i++) {
814 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
815 vm_page_release(&vm_pages[i - 1]);
816 }
817 }
818 else
819 // debug code remove-
820
821 /*
822 * Release pages in reverse order so that physical pages
823 * initially get allocated in ascending addresses. This keeps
824 * the devices (which must address physical memory) happy if
825 * they require several consecutive pages.
826 */
827 for (i = pages_initialized; i > 0; i--) {
828 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
829 vm_page_release(&vm_pages[i - 1]);
830 }
831
832 #if 0
833 {
834 vm_page_t xx, xxo, xxl;
835 int i, j, k, l;
836
837 j = 0; /* (BRINGUP) */
838 xxl = 0;
839
840 for( i = 0; i < vm_colors; i++ ) {
841 queue_iterate(&vm_page_queue_free[i],
842 xx,
843 vm_page_t,
844 pageq) { /* BRINGUP */
845 j++; /* (BRINGUP) */
846 if(j > vm_page_free_count) { /* (BRINGUP) */
847 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
848 }
849
850 l = vm_page_free_count - j; /* (BRINGUP) */
851 k = 0; /* (BRINGUP) */
852
853 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
854
855 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
856 k++;
857 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
858 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
859 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
860 }
861 }
862
863 xxl = xx;
864 }
865 }
866
867 if(j != vm_page_free_count) { /* (BRINGUP) */
868 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
869 }
870 }
871 #endif
872
873
874 /*
875 * We have to re-align virtual_space_start,
876 * because pmap_steal_memory has been using it.
877 */
878
879 virtual_space_start = round_page(virtual_space_start);
880
881 *startp = virtual_space_start;
882 *endp = virtual_space_end;
883 }
884 #endif /* MACHINE_PAGES */
885
886 /*
887 * Routine: vm_page_module_init
888 * Purpose:
889 * Second initialization pass, to be done after
890 * the basic VM system is ready.
891 */
892 void
893 vm_page_module_init(void)
894 {
895 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
896 0, PAGE_SIZE, "vm pages");
897
898 #if ZONE_DEBUG
899 zone_debug_disable(vm_page_zone);
900 #endif /* ZONE_DEBUG */
901
902 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
903 zone_change(vm_page_zone, Z_EXPAND, FALSE);
904 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
905 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
906
907 /*
908 * Adjust zone statistics to account for the real pages allocated
909 * in vm_page_create(). [Q: is this really what we want?]
910 */
911 vm_page_zone->count += vm_page_pages;
912 vm_page_zone->sum_count += vm_page_pages;
913 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
914
915 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
916 }
917
918 /*
919 * Routine: vm_page_create
920 * Purpose:
921 * After the VM system is up, machine-dependent code
922 * may stumble across more physical memory. For example,
923 * memory that it was reserving for a frame buffer.
924 * vm_page_create turns this memory into available pages.
925 */
926
927 void
928 vm_page_create(
929 ppnum_t start,
930 ppnum_t end)
931 {
932 ppnum_t phys_page;
933 vm_page_t m;
934
935 for (phys_page = start;
936 phys_page < end;
937 phys_page++) {
938 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
939 == VM_PAGE_NULL)
940 vm_page_more_fictitious();
941
942 m->fictitious = FALSE;
943 pmap_clear_noencrypt(phys_page);
944
945 vm_page_pages++;
946 vm_page_release(m);
947 }
948 }
949
950 /*
951 * vm_page_hash:
952 *
953 * Distributes the object/offset key pair among hash buckets.
954 *
955 * NOTE: The bucket count must be a power of 2
956 */
957 #define vm_page_hash(object, offset) (\
958 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
959 & vm_page_hash_mask)
960
961
962 /*
963 * vm_page_insert: [ internal use only ]
964 *
965 * Inserts the given mem entry into the object/object-page
966 * table and object list.
967 *
968 * The object must be locked.
969 */
970 void
971 vm_page_insert(
972 vm_page_t mem,
973 vm_object_t object,
974 vm_object_offset_t offset)
975 {
976 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
977 }
978
979 void
980 vm_page_insert_internal(
981 vm_page_t mem,
982 vm_object_t object,
983 vm_object_offset_t offset,
984 boolean_t queues_lock_held,
985 boolean_t insert_in_hash)
986 {
987 vm_page_bucket_t *bucket;
988 lck_spin_t *bucket_lock;
989 int hash_id;
990
991 XPR(XPR_VM_PAGE,
992 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
993 object, offset, mem, 0,0);
994
995 VM_PAGE_CHECK(mem);
996
997 if (object == vm_submap_object) {
998 /* the vm_submap_object is only a placeholder for submaps */
999 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1000 }
1001
1002 vm_object_lock_assert_exclusive(object);
1003 #if DEBUG
1004 lck_mtx_assert(&vm_page_queue_lock,
1005 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1006 : LCK_MTX_ASSERT_NOTOWNED);
1007 #endif /* DEBUG */
1008
1009 if (insert_in_hash == TRUE) {
1010 #if DEBUG
1011 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1012 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013 "already in (obj=%p,off=0x%llx)",
1014 mem, object, offset, mem->object, mem->offset);
1015 #endif
1016 assert(!object->internal || offset < object->vo_size);
1017
1018 /* only insert "pageout" pages into "pageout" objects,
1019 * and normal pages into normal objects */
1020 assert(object->pageout == mem->pageout);
1021
1022 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1023
1024 /*
1025 * Record the object/offset pair in this page
1026 */
1027
1028 mem->object = object;
1029 mem->offset = offset;
1030
1031 /*
1032 * Insert it into the object_object/offset hash table
1033 */
1034 hash_id = vm_page_hash(object, offset);
1035 bucket = &vm_page_buckets[hash_id];
1036 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1037
1038 lck_spin_lock(bucket_lock);
1039
1040 mem->next = bucket->pages;
1041 bucket->pages = mem;
1042 #if MACH_PAGE_HASH_STATS
1043 if (++bucket->cur_count > bucket->hi_count)
1044 bucket->hi_count = bucket->cur_count;
1045 #endif /* MACH_PAGE_HASH_STATS */
1046
1047 lck_spin_unlock(bucket_lock);
1048 }
1049
1050 { unsigned int cache_attr;
1051
1052 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1053
1054 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1055 pmap_set_cache_attributes(mem->phys_page, cache_attr);
1056 object->set_cache_attr = TRUE;
1057 }
1058 }
1059 /*
1060 * Now link into the object's list of backed pages.
1061 */
1062
1063 VM_PAGE_INSERT(mem, object);
1064 mem->tabled = TRUE;
1065
1066 /*
1067 * Show that the object has one more resident page.
1068 */
1069
1070 object->resident_page_count++;
1071 if (VM_PAGE_WIRED(mem)) {
1072 object->wired_page_count++;
1073 }
1074 assert(object->resident_page_count >= object->wired_page_count);
1075
1076 assert(!mem->reusable);
1077
1078 if (object->purgable == VM_PURGABLE_VOLATILE) {
1079 if (VM_PAGE_WIRED(mem)) {
1080 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1081 } else {
1082 OSAddAtomic(1, &vm_page_purgeable_count);
1083 }
1084 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1085 mem->throttled) {
1086 /*
1087 * This page belongs to a purged VM object but hasn't
1088 * been purged (because it was "busy").
1089 * It's in the "throttled" queue and hence not
1090 * visible to vm_pageout_scan(). Move it to a pageable
1091 * queue, so that it can eventually be reclaimed, instead
1092 * of lingering in the "empty" object.
1093 */
1094 if (queues_lock_held == FALSE)
1095 vm_page_lockspin_queues();
1096 vm_page_deactivate(mem);
1097 if (queues_lock_held == FALSE)
1098 vm_page_unlock_queues();
1099 }
1100 }
1101
1102 /*
1103 * vm_page_replace:
1104 *
1105 * Exactly like vm_page_insert, except that we first
1106 * remove any existing page at the given offset in object.
1107 *
1108 * The object must be locked.
1109 */
1110 void
1111 vm_page_replace(
1112 register vm_page_t mem,
1113 register vm_object_t object,
1114 register vm_object_offset_t offset)
1115 {
1116 vm_page_bucket_t *bucket;
1117 vm_page_t found_m = VM_PAGE_NULL;
1118 lck_spin_t *bucket_lock;
1119 int hash_id;
1120
1121 VM_PAGE_CHECK(mem);
1122 vm_object_lock_assert_exclusive(object);
1123 #if DEBUG
1124 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1125 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126 "already in (obj=%p,off=0x%llx)",
1127 mem, object, offset, mem->object, mem->offset);
1128 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1129 #endif
1130 /*
1131 * Record the object/offset pair in this page
1132 */
1133
1134 mem->object = object;
1135 mem->offset = offset;
1136
1137 /*
1138 * Insert it into the object_object/offset hash table,
1139 * replacing any page that might have been there.
1140 */
1141
1142 hash_id = vm_page_hash(object, offset);
1143 bucket = &vm_page_buckets[hash_id];
1144 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1145
1146 lck_spin_lock(bucket_lock);
1147
1148 if (bucket->pages) {
1149 vm_page_t *mp = &bucket->pages;
1150 vm_page_t m = *mp;
1151
1152 do {
1153 if (m->object == object && m->offset == offset) {
1154 /*
1155 * Remove old page from hash list
1156 */
1157 *mp = m->next;
1158
1159 found_m = m;
1160 break;
1161 }
1162 mp = &m->next;
1163 } while ((m = *mp));
1164
1165 mem->next = bucket->pages;
1166 } else {
1167 mem->next = VM_PAGE_NULL;
1168 }
1169 /*
1170 * insert new page at head of hash list
1171 */
1172 bucket->pages = mem;
1173
1174 lck_spin_unlock(bucket_lock);
1175
1176 if (found_m) {
1177 /*
1178 * there was already a page at the specified
1179 * offset for this object... remove it from
1180 * the object and free it back to the free list
1181 */
1182 vm_page_free_unlocked(found_m, FALSE);
1183 }
1184 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1185 }
1186
1187 /*
1188 * vm_page_remove: [ internal use only ]
1189 *
1190 * Removes the given mem entry from the object/offset-page
1191 * table and the object page list.
1192 *
1193 * The object must be locked.
1194 */
1195
1196 void
1197 vm_page_remove(
1198 vm_page_t mem,
1199 boolean_t remove_from_hash)
1200 {
1201 vm_page_bucket_t *bucket;
1202 vm_page_t this;
1203 lck_spin_t *bucket_lock;
1204 int hash_id;
1205
1206 XPR(XPR_VM_PAGE,
1207 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1208 mem->object, mem->offset,
1209 mem, 0,0);
1210
1211 vm_object_lock_assert_exclusive(mem->object);
1212 assert(mem->tabled);
1213 assert(!mem->cleaning);
1214 VM_PAGE_CHECK(mem);
1215
1216 if (remove_from_hash == TRUE) {
1217 /*
1218 * Remove from the object_object/offset hash table
1219 */
1220 hash_id = vm_page_hash(mem->object, mem->offset);
1221 bucket = &vm_page_buckets[hash_id];
1222 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1223
1224 lck_spin_lock(bucket_lock);
1225
1226 if ((this = bucket->pages) == mem) {
1227 /* optimize for common case */
1228
1229 bucket->pages = mem->next;
1230 } else {
1231 vm_page_t *prev;
1232
1233 for (prev = &this->next;
1234 (this = *prev) != mem;
1235 prev = &this->next)
1236 continue;
1237 *prev = this->next;
1238 }
1239 #if MACH_PAGE_HASH_STATS
1240 bucket->cur_count--;
1241 #endif /* MACH_PAGE_HASH_STATS */
1242
1243 lck_spin_unlock(bucket_lock);
1244 }
1245 /*
1246 * Now remove from the object's list of backed pages.
1247 */
1248
1249 VM_PAGE_REMOVE(mem);
1250
1251 /*
1252 * And show that the object has one fewer resident
1253 * page.
1254 */
1255
1256 assert(mem->object->resident_page_count > 0);
1257 mem->object->resident_page_count--;
1258
1259 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1260 if (mem->object->resident_page_count == 0)
1261 vm_object_cache_remove(mem->object);
1262 }
1263
1264 if (VM_PAGE_WIRED(mem)) {
1265 assert(mem->object->wired_page_count > 0);
1266 mem->object->wired_page_count--;
1267 }
1268 assert(mem->object->resident_page_count >=
1269 mem->object->wired_page_count);
1270 if (mem->reusable) {
1271 assert(mem->object->reusable_page_count > 0);
1272 mem->object->reusable_page_count--;
1273 assert(mem->object->reusable_page_count <=
1274 mem->object->resident_page_count);
1275 mem->reusable = FALSE;
1276 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1277 vm_page_stats_reusable.reused_remove++;
1278 } else if (mem->object->all_reusable) {
1279 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1280 vm_page_stats_reusable.reused_remove++;
1281 }
1282
1283 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1284 if (VM_PAGE_WIRED(mem)) {
1285 assert(vm_page_purgeable_wired_count > 0);
1286 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1287 } else {
1288 assert(vm_page_purgeable_count > 0);
1289 OSAddAtomic(-1, &vm_page_purgeable_count);
1290 }
1291 }
1292 if (mem->object->set_cache_attr == TRUE)
1293 pmap_set_cache_attributes(mem->phys_page, 0);
1294
1295 mem->tabled = FALSE;
1296 mem->object = VM_OBJECT_NULL;
1297 mem->offset = (vm_object_offset_t) -1;
1298 }
1299
1300
1301 /*
1302 * vm_page_lookup:
1303 *
1304 * Returns the page associated with the object/offset
1305 * pair specified; if none is found, VM_PAGE_NULL is returned.
1306 *
1307 * The object must be locked. No side effects.
1308 */
1309
1310 unsigned long vm_page_lookup_hint = 0;
1311 unsigned long vm_page_lookup_hint_next = 0;
1312 unsigned long vm_page_lookup_hint_prev = 0;
1313 unsigned long vm_page_lookup_hint_miss = 0;
1314 unsigned long vm_page_lookup_bucket_NULL = 0;
1315 unsigned long vm_page_lookup_miss = 0;
1316
1317
1318 vm_page_t
1319 vm_page_lookup(
1320 vm_object_t object,
1321 vm_object_offset_t offset)
1322 {
1323 vm_page_t mem;
1324 vm_page_bucket_t *bucket;
1325 queue_entry_t qe;
1326 lck_spin_t *bucket_lock;
1327 int hash_id;
1328
1329 vm_object_lock_assert_held(object);
1330 mem = object->memq_hint;
1331
1332 if (mem != VM_PAGE_NULL) {
1333 assert(mem->object == object);
1334
1335 if (mem->offset == offset) {
1336 vm_page_lookup_hint++;
1337 return mem;
1338 }
1339 qe = queue_next(&mem->listq);
1340
1341 if (! queue_end(&object->memq, qe)) {
1342 vm_page_t next_page;
1343
1344 next_page = (vm_page_t) qe;
1345 assert(next_page->object == object);
1346
1347 if (next_page->offset == offset) {
1348 vm_page_lookup_hint_next++;
1349 object->memq_hint = next_page; /* new hint */
1350 return next_page;
1351 }
1352 }
1353 qe = queue_prev(&mem->listq);
1354
1355 if (! queue_end(&object->memq, qe)) {
1356 vm_page_t prev_page;
1357
1358 prev_page = (vm_page_t) qe;
1359 assert(prev_page->object == object);
1360
1361 if (prev_page->offset == offset) {
1362 vm_page_lookup_hint_prev++;
1363 object->memq_hint = prev_page; /* new hint */
1364 return prev_page;
1365 }
1366 }
1367 }
1368 /*
1369 * Search the hash table for this object/offset pair
1370 */
1371 hash_id = vm_page_hash(object, offset);
1372 bucket = &vm_page_buckets[hash_id];
1373
1374 /*
1375 * since we hold the object lock, we are guaranteed that no
1376 * new pages can be inserted into this object... this in turn
1377 * guarantess that the page we're looking for can't exist
1378 * if the bucket it hashes to is currently NULL even when looked
1379 * at outside the scope of the hash bucket lock... this is a
1380 * really cheap optimiztion to avoid taking the lock
1381 */
1382 if (bucket->pages == VM_PAGE_NULL) {
1383 vm_page_lookup_bucket_NULL++;
1384
1385 return (VM_PAGE_NULL);
1386 }
1387 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1388
1389 lck_spin_lock(bucket_lock);
1390
1391 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1392 VM_PAGE_CHECK(mem);
1393 if ((mem->object == object) && (mem->offset == offset))
1394 break;
1395 }
1396 lck_spin_unlock(bucket_lock);
1397
1398 if (mem != VM_PAGE_NULL) {
1399 if (object->memq_hint != VM_PAGE_NULL) {
1400 vm_page_lookup_hint_miss++;
1401 }
1402 assert(mem->object == object);
1403 object->memq_hint = mem;
1404 } else
1405 vm_page_lookup_miss++;
1406
1407 return(mem);
1408 }
1409
1410
1411 /*
1412 * vm_page_rename:
1413 *
1414 * Move the given memory entry from its
1415 * current object to the specified target object/offset.
1416 *
1417 * The object must be locked.
1418 */
1419 void
1420 vm_page_rename(
1421 register vm_page_t mem,
1422 register vm_object_t new_object,
1423 vm_object_offset_t new_offset,
1424 boolean_t encrypted_ok)
1425 {
1426 assert(mem->object != new_object);
1427
1428 /*
1429 * ENCRYPTED SWAP:
1430 * The encryption key is based on the page's memory object
1431 * (aka "pager") and paging offset. Moving the page to
1432 * another VM object changes its "pager" and "paging_offset"
1433 * so it has to be decrypted first, or we would lose the key.
1434 *
1435 * One exception is VM object collapsing, where we transfer pages
1436 * from one backing object to its parent object. This operation also
1437 * transfers the paging information, so the <pager,paging_offset> info
1438 * should remain consistent. The caller (vm_object_do_collapse())
1439 * sets "encrypted_ok" in this case.
1440 */
1441 if (!encrypted_ok && mem->encrypted) {
1442 panic("vm_page_rename: page %p is encrypted\n", mem);
1443 }
1444
1445 XPR(XPR_VM_PAGE,
1446 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447 new_object, new_offset,
1448 mem, 0,0);
1449
1450 /*
1451 * Changes to mem->object require the page lock because
1452 * the pageout daemon uses that lock to get the object.
1453 */
1454 vm_page_lockspin_queues();
1455
1456 vm_page_remove(mem, TRUE);
1457 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1458
1459 vm_page_unlock_queues();
1460 }
1461
1462 /*
1463 * vm_page_init:
1464 *
1465 * Initialize the fields in a new page.
1466 * This takes a structure with random values and initializes it
1467 * so that it can be given to vm_page_release or vm_page_insert.
1468 */
1469 void
1470 vm_page_init(
1471 vm_page_t mem,
1472 ppnum_t phys_page,
1473 boolean_t lopage)
1474 {
1475 assert(phys_page);
1476 *mem = vm_page_template;
1477 mem->phys_page = phys_page;
1478 #if 0
1479 /*
1480 * we're leaving this turned off for now... currently pages
1481 * come off the free list and are either immediately dirtied/referenced
1482 * due to zero-fill or COW faults, or are used to read or write files...
1483 * in the file I/O case, the UPL mechanism takes care of clearing
1484 * the state of the HW ref/mod bits in a somewhat fragile way.
1485 * Since we may change the way this works in the future (to toughen it up),
1486 * I'm leaving this as a reminder of where these bits could get cleared
1487 */
1488
1489 /*
1490 * make sure both the h/w referenced and modified bits are
1491 * clear at this point... we are especially dependent on
1492 * not finding a 'stale' h/w modified in a number of spots
1493 * once this page goes back into use
1494 */
1495 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1496 #endif
1497 mem->lopage = lopage;
1498 }
1499
1500 /*
1501 * vm_page_grab_fictitious:
1502 *
1503 * Remove a fictitious page from the free list.
1504 * Returns VM_PAGE_NULL if there are no free pages.
1505 */
1506 int c_vm_page_grab_fictitious = 0;
1507 int c_vm_page_grab_fictitious_failed = 0;
1508 int c_vm_page_release_fictitious = 0;
1509 int c_vm_page_more_fictitious = 0;
1510
1511 vm_page_t
1512 vm_page_grab_fictitious_common(
1513 ppnum_t phys_addr)
1514 {
1515 vm_page_t m;
1516
1517 if ((m = (vm_page_t)zget(vm_page_zone))) {
1518
1519 vm_page_init(m, phys_addr, FALSE);
1520 m->fictitious = TRUE;
1521
1522 c_vm_page_grab_fictitious++;
1523 } else
1524 c_vm_page_grab_fictitious_failed++;
1525
1526 return m;
1527 }
1528
1529 vm_page_t
1530 vm_page_grab_fictitious(void)
1531 {
1532 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1533 }
1534
1535 vm_page_t
1536 vm_page_grab_guard(void)
1537 {
1538 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1539 }
1540
1541
1542 /*
1543 * vm_page_release_fictitious:
1544 *
1545 * Release a fictitious page to the zone pool
1546 */
1547 void
1548 vm_page_release_fictitious(
1549 vm_page_t m)
1550 {
1551 assert(!m->free);
1552 assert(m->fictitious);
1553 assert(m->phys_page == vm_page_fictitious_addr ||
1554 m->phys_page == vm_page_guard_addr);
1555
1556 c_vm_page_release_fictitious++;
1557
1558 zfree(vm_page_zone, m);
1559 }
1560
1561 /*
1562 * vm_page_more_fictitious:
1563 *
1564 * Add more fictitious pages to the zone.
1565 * Allowed to block. This routine is way intimate
1566 * with the zones code, for several reasons:
1567 * 1. we need to carve some page structures out of physical
1568 * memory before zones work, so they _cannot_ come from
1569 * the zone_map.
1570 * 2. the zone needs to be collectable in order to prevent
1571 * growth without bound. These structures are used by
1572 * the device pager (by the hundreds and thousands), as
1573 * private pages for pageout, and as blocking pages for
1574 * pagein. Temporary bursts in demand should not result in
1575 * permanent allocation of a resource.
1576 * 3. To smooth allocation humps, we allocate single pages
1577 * with kernel_memory_allocate(), and cram them into the
1578 * zone.
1579 */
1580
1581 void vm_page_more_fictitious(void)
1582 {
1583 vm_offset_t addr;
1584 kern_return_t retval;
1585
1586 c_vm_page_more_fictitious++;
1587
1588 /*
1589 * Allocate a single page from the zone_map. Do not wait if no physical
1590 * pages are immediately available, and do not zero the space. We need
1591 * our own blocking lock here to prevent having multiple,
1592 * simultaneous requests from piling up on the zone_map lock. Exactly
1593 * one (of our) threads should be potentially waiting on the map lock.
1594 * If winner is not vm-privileged, then the page allocation will fail,
1595 * and it will temporarily block here in the vm_page_wait().
1596 */
1597 lck_mtx_lock(&vm_page_alloc_lock);
1598 /*
1599 * If another thread allocated space, just bail out now.
1600 */
1601 if (zone_free_count(vm_page_zone) > 5) {
1602 /*
1603 * The number "5" is a small number that is larger than the
1604 * number of fictitious pages that any single caller will
1605 * attempt to allocate. Otherwise, a thread will attempt to
1606 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1607 * release all of the resources and locks already acquired,
1608 * and then call this routine. This routine finds the pages
1609 * that the caller released, so fails to allocate new space.
1610 * The process repeats infinitely. The largest known number
1611 * of fictitious pages required in this manner is 2. 5 is
1612 * simply a somewhat larger number.
1613 */
1614 lck_mtx_unlock(&vm_page_alloc_lock);
1615 return;
1616 }
1617
1618 retval = kernel_memory_allocate(zone_map,
1619 &addr, PAGE_SIZE, VM_PROT_ALL,
1620 KMA_KOBJECT|KMA_NOPAGEWAIT);
1621 if (retval != KERN_SUCCESS) {
1622 /*
1623 * No page was available. Drop the
1624 * lock to give another thread a chance at it, and
1625 * wait for the pageout daemon to make progress.
1626 */
1627 lck_mtx_unlock(&vm_page_alloc_lock);
1628 vm_page_wait(THREAD_UNINT);
1629 return;
1630 }
1631 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1632
1633 lck_mtx_unlock(&vm_page_alloc_lock);
1634 }
1635
1636
1637 /*
1638 * vm_pool_low():
1639 *
1640 * Return true if it is not likely that a non-vm_privileged thread
1641 * can get memory without blocking. Advisory only, since the
1642 * situation may change under us.
1643 */
1644 int
1645 vm_pool_low(void)
1646 {
1647 /* No locking, at worst we will fib. */
1648 return( vm_page_free_count <= vm_page_free_reserved );
1649 }
1650
1651
1652
1653 /*
1654 * this is an interface to support bring-up of drivers
1655 * on platforms with physical memory > 4G...
1656 */
1657 int vm_himemory_mode = 0;
1658
1659
1660 /*
1661 * this interface exists to support hardware controllers
1662 * incapable of generating DMAs with more than 32 bits
1663 * of address on platforms with physical memory > 4G...
1664 */
1665 unsigned int vm_lopages_allocated_q = 0;
1666 unsigned int vm_lopages_allocated_cpm_success = 0;
1667 unsigned int vm_lopages_allocated_cpm_failed = 0;
1668 queue_head_t vm_lopage_queue_free;
1669
1670 vm_page_t
1671 vm_page_grablo(void)
1672 {
1673 vm_page_t mem;
1674
1675 if (vm_lopage_needed == FALSE)
1676 return (vm_page_grab());
1677
1678 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1679
1680 if ( !queue_empty(&vm_lopage_queue_free)) {
1681 queue_remove_first(&vm_lopage_queue_free,
1682 mem,
1683 vm_page_t,
1684 pageq);
1685 assert(vm_lopage_free_count);
1686
1687 vm_lopage_free_count--;
1688 vm_lopages_allocated_q++;
1689
1690 if (vm_lopage_free_count < vm_lopage_lowater)
1691 vm_lopage_refill = TRUE;
1692
1693 lck_mtx_unlock(&vm_page_queue_free_lock);
1694 } else {
1695 lck_mtx_unlock(&vm_page_queue_free_lock);
1696
1697 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1698
1699 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1700 vm_lopages_allocated_cpm_failed++;
1701 lck_mtx_unlock(&vm_page_queue_free_lock);
1702
1703 return (VM_PAGE_NULL);
1704 }
1705 mem->busy = TRUE;
1706
1707 vm_page_lockspin_queues();
1708
1709 mem->gobbled = FALSE;
1710 vm_page_gobble_count--;
1711 vm_page_wire_count--;
1712
1713 vm_lopages_allocated_cpm_success++;
1714 vm_page_unlock_queues();
1715 }
1716 assert(mem->busy);
1717 assert(!mem->free);
1718 assert(!mem->pmapped);
1719 assert(!mem->wpmapped);
1720
1721 mem->pageq.next = NULL;
1722 mem->pageq.prev = NULL;
1723
1724 return (mem);
1725 }
1726
1727
1728 /*
1729 * vm_page_grab:
1730 *
1731 * first try to grab a page from the per-cpu free list...
1732 * this must be done while pre-emption is disabled... if
1733 * a page is available, we're done...
1734 * if no page is available, grab the vm_page_queue_free_lock
1735 * and see if current number of free pages would allow us
1736 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1737 * if there are pages available, disable preemption and
1738 * recheck the state of the per-cpu free list... we could
1739 * have been preempted and moved to a different cpu, or
1740 * some other thread could have re-filled it... if still
1741 * empty, figure out how many pages we can steal from the
1742 * global free queue and move to the per-cpu queue...
1743 * return 1 of these pages when done... only wakeup the
1744 * pageout_scan thread if we moved pages from the global
1745 * list... no need for the wakeup if we've satisfied the
1746 * request from the per-cpu queue.
1747 */
1748
1749 #define COLOR_GROUPS_TO_STEAL 4
1750
1751
1752 vm_page_t
1753 vm_page_grab( void )
1754 {
1755 vm_page_t mem;
1756
1757
1758 disable_preemption();
1759
1760 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1761 return_page_from_cpu_list:
1762 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1763 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1764 mem->pageq.next = NULL;
1765
1766 enable_preemption();
1767
1768 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1769 assert(mem->tabled == FALSE);
1770 assert(mem->object == VM_OBJECT_NULL);
1771 assert(!mem->laundry);
1772 assert(!mem->free);
1773 assert(pmap_verify_free(mem->phys_page));
1774 assert(mem->busy);
1775 assert(!mem->encrypted);
1776 assert(!mem->pmapped);
1777 assert(!mem->wpmapped);
1778 assert(!mem->active);
1779 assert(!mem->inactive);
1780 assert(!mem->throttled);
1781 assert(!mem->speculative);
1782
1783 return mem;
1784 }
1785 enable_preemption();
1786
1787
1788 /*
1789 * Optionally produce warnings if the wire or gobble
1790 * counts exceed some threshold.
1791 */
1792 if (vm_page_wire_count_warning > 0
1793 && vm_page_wire_count >= vm_page_wire_count_warning) {
1794 printf("mk: vm_page_grab(): high wired page count of %d\n",
1795 vm_page_wire_count);
1796 assert(vm_page_wire_count < vm_page_wire_count_warning);
1797 }
1798 if (vm_page_gobble_count_warning > 0
1799 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1800 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1801 vm_page_gobble_count);
1802 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1803 }
1804
1805 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1806
1807 /*
1808 * Only let privileged threads (involved in pageout)
1809 * dip into the reserved pool.
1810 */
1811 if ((vm_page_free_count < vm_page_free_reserved) &&
1812 !(current_thread()->options & TH_OPT_VMPRIV)) {
1813 lck_mtx_unlock(&vm_page_queue_free_lock);
1814 mem = VM_PAGE_NULL;
1815 }
1816 else {
1817 vm_page_t head;
1818 vm_page_t tail;
1819 unsigned int pages_to_steal;
1820 unsigned int color;
1821
1822 while ( vm_page_free_count == 0 ) {
1823
1824 lck_mtx_unlock(&vm_page_queue_free_lock);
1825 /*
1826 * must be a privileged thread to be
1827 * in this state since a non-privileged
1828 * thread would have bailed if we were
1829 * under the vm_page_free_reserved mark
1830 */
1831 VM_PAGE_WAIT();
1832 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1833 }
1834
1835 disable_preemption();
1836
1837 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1838 lck_mtx_unlock(&vm_page_queue_free_lock);
1839
1840 /*
1841 * we got preempted and moved to another processor
1842 * or we got preempted and someone else ran and filled the cache
1843 */
1844 goto return_page_from_cpu_list;
1845 }
1846 if (vm_page_free_count <= vm_page_free_reserved)
1847 pages_to_steal = 1;
1848 else {
1849 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1850
1851 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1852 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1853 }
1854 color = PROCESSOR_DATA(current_processor(), start_color);
1855 head = tail = NULL;
1856
1857 while (pages_to_steal--) {
1858 if (--vm_page_free_count < vm_page_free_count_minimum)
1859 vm_page_free_count_minimum = vm_page_free_count;
1860
1861 while (queue_empty(&vm_page_queue_free[color]))
1862 color = (color + 1) & vm_color_mask;
1863
1864 queue_remove_first(&vm_page_queue_free[color],
1865 mem,
1866 vm_page_t,
1867 pageq);
1868 mem->pageq.next = NULL;
1869 mem->pageq.prev = NULL;
1870
1871 assert(!mem->active);
1872 assert(!mem->inactive);
1873 assert(!mem->throttled);
1874 assert(!mem->speculative);
1875
1876 color = (color + 1) & vm_color_mask;
1877
1878 if (head == NULL)
1879 head = mem;
1880 else
1881 tail->pageq.next = (queue_t)mem;
1882 tail = mem;
1883
1884 mem->pageq.prev = NULL;
1885 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1886 assert(mem->tabled == FALSE);
1887 assert(mem->object == VM_OBJECT_NULL);
1888 assert(!mem->laundry);
1889 assert(mem->free);
1890 mem->free = FALSE;
1891
1892 assert(pmap_verify_free(mem->phys_page));
1893 assert(mem->busy);
1894 assert(!mem->free);
1895 assert(!mem->encrypted);
1896 assert(!mem->pmapped);
1897 assert(!mem->wpmapped);
1898 }
1899 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1900 PROCESSOR_DATA(current_processor(), start_color) = color;
1901
1902 /*
1903 * satisfy this request
1904 */
1905 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1906 mem = head;
1907 mem->pageq.next = NULL;
1908
1909 lck_mtx_unlock(&vm_page_queue_free_lock);
1910
1911 enable_preemption();
1912 }
1913 /*
1914 * Decide if we should poke the pageout daemon.
1915 * We do this if the free count is less than the low
1916 * water mark, or if the free count is less than the high
1917 * water mark (but above the low water mark) and the inactive
1918 * count is less than its target.
1919 *
1920 * We don't have the counts locked ... if they change a little,
1921 * it doesn't really matter.
1922 */
1923 if ((vm_page_free_count < vm_page_free_min) ||
1924 ((vm_page_free_count < vm_page_free_target) &&
1925 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1926 thread_wakeup((event_t) &vm_page_free_wanted);
1927
1928 VM_CHECK_MEMORYSTATUS;
1929
1930 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1931
1932 return mem;
1933 }
1934
1935 /*
1936 * vm_page_release:
1937 *
1938 * Return a page to the free list.
1939 */
1940
1941 void
1942 vm_page_release(
1943 register vm_page_t mem)
1944 {
1945 unsigned int color;
1946 int need_wakeup = 0;
1947 int need_priv_wakeup = 0;
1948
1949
1950 assert(!mem->private && !mem->fictitious);
1951 if (vm_page_free_verify) {
1952 assert(pmap_verify_free(mem->phys_page));
1953 }
1954 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1955
1956
1957 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1958 #if DEBUG
1959 if (mem->free)
1960 panic("vm_page_release");
1961 #endif
1962
1963 assert(mem->busy);
1964 assert(!mem->laundry);
1965 assert(mem->object == VM_OBJECT_NULL);
1966 assert(mem->pageq.next == NULL &&
1967 mem->pageq.prev == NULL);
1968 assert(mem->listq.next == NULL &&
1969 mem->listq.prev == NULL);
1970
1971 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
1972 vm_lopage_free_count < vm_lopage_free_limit &&
1973 mem->phys_page < max_valid_low_ppnum) {
1974 /*
1975 * this exists to support hardware controllers
1976 * incapable of generating DMAs with more than 32 bits
1977 * of address on platforms with physical memory > 4G...
1978 */
1979 queue_enter_first(&vm_lopage_queue_free,
1980 mem,
1981 vm_page_t,
1982 pageq);
1983 vm_lopage_free_count++;
1984
1985 if (vm_lopage_free_count >= vm_lopage_free_limit)
1986 vm_lopage_refill = FALSE;
1987
1988 mem->lopage = TRUE;
1989 } else {
1990 mem->lopage = FALSE;
1991 mem->free = TRUE;
1992
1993 color = mem->phys_page & vm_color_mask;
1994 queue_enter_first(&vm_page_queue_free[color],
1995 mem,
1996 vm_page_t,
1997 pageq);
1998 vm_page_free_count++;
1999 /*
2000 * Check if we should wake up someone waiting for page.
2001 * But don't bother waking them unless they can allocate.
2002 *
2003 * We wakeup only one thread, to prevent starvation.
2004 * Because the scheduling system handles wait queues FIFO,
2005 * if we wakeup all waiting threads, one greedy thread
2006 * can starve multiple niceguy threads. When the threads
2007 * all wakeup, the greedy threads runs first, grabs the page,
2008 * and waits for another page. It will be the first to run
2009 * when the next page is freed.
2010 *
2011 * However, there is a slight danger here.
2012 * The thread we wake might not use the free page.
2013 * Then the other threads could wait indefinitely
2014 * while the page goes unused. To forestall this,
2015 * the pageout daemon will keep making free pages
2016 * as long as vm_page_free_wanted is non-zero.
2017 */
2018
2019 assert(vm_page_free_count > 0);
2020 if (vm_page_free_wanted_privileged > 0) {
2021 vm_page_free_wanted_privileged--;
2022 need_priv_wakeup = 1;
2023 } else if (vm_page_free_wanted > 0 &&
2024 vm_page_free_count > vm_page_free_reserved) {
2025 vm_page_free_wanted--;
2026 need_wakeup = 1;
2027 }
2028 }
2029 lck_mtx_unlock(&vm_page_queue_free_lock);
2030
2031 if (need_priv_wakeup)
2032 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2033 else if (need_wakeup)
2034 thread_wakeup_one((event_t) &vm_page_free_count);
2035
2036 VM_CHECK_MEMORYSTATUS;
2037 }
2038
2039 /*
2040 * vm_page_wait:
2041 *
2042 * Wait for a page to become available.
2043 * If there are plenty of free pages, then we don't sleep.
2044 *
2045 * Returns:
2046 * TRUE: There may be another page, try again
2047 * FALSE: We were interrupted out of our wait, don't try again
2048 */
2049
2050 boolean_t
2051 vm_page_wait(
2052 int interruptible )
2053 {
2054 /*
2055 * We can't use vm_page_free_reserved to make this
2056 * determination. Consider: some thread might
2057 * need to allocate two pages. The first allocation
2058 * succeeds, the second fails. After the first page is freed,
2059 * a call to vm_page_wait must really block.
2060 */
2061 kern_return_t wait_result;
2062 int need_wakeup = 0;
2063 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2064
2065 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2066
2067 if (is_privileged && vm_page_free_count) {
2068 lck_mtx_unlock(&vm_page_queue_free_lock);
2069 return TRUE;
2070 }
2071 if (vm_page_free_count < vm_page_free_target) {
2072
2073 if (is_privileged) {
2074 if (vm_page_free_wanted_privileged++ == 0)
2075 need_wakeup = 1;
2076 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2077 } else {
2078 if (vm_page_free_wanted++ == 0)
2079 need_wakeup = 1;
2080 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2081 }
2082 lck_mtx_unlock(&vm_page_queue_free_lock);
2083 counter(c_vm_page_wait_block++);
2084
2085 if (need_wakeup)
2086 thread_wakeup((event_t)&vm_page_free_wanted);
2087
2088 if (wait_result == THREAD_WAITING)
2089 wait_result = thread_block(THREAD_CONTINUE_NULL);
2090
2091 return(wait_result == THREAD_AWAKENED);
2092 } else {
2093 lck_mtx_unlock(&vm_page_queue_free_lock);
2094 return TRUE;
2095 }
2096 }
2097
2098 /*
2099 * vm_page_alloc:
2100 *
2101 * Allocate and return a memory cell associated
2102 * with this VM object/offset pair.
2103 *
2104 * Object must be locked.
2105 */
2106
2107 vm_page_t
2108 vm_page_alloc(
2109 vm_object_t object,
2110 vm_object_offset_t offset)
2111 {
2112 register vm_page_t mem;
2113
2114 vm_object_lock_assert_exclusive(object);
2115 mem = vm_page_grab();
2116 if (mem == VM_PAGE_NULL)
2117 return VM_PAGE_NULL;
2118
2119 vm_page_insert(mem, object, offset);
2120
2121 return(mem);
2122 }
2123
2124 vm_page_t
2125 vm_page_alloclo(
2126 vm_object_t object,
2127 vm_object_offset_t offset)
2128 {
2129 register vm_page_t mem;
2130
2131 vm_object_lock_assert_exclusive(object);
2132 mem = vm_page_grablo();
2133 if (mem == VM_PAGE_NULL)
2134 return VM_PAGE_NULL;
2135
2136 vm_page_insert(mem, object, offset);
2137
2138 return(mem);
2139 }
2140
2141
2142 /*
2143 * vm_page_alloc_guard:
2144 *
2145 * Allocate a fictitious page which will be used
2146 * as a guard page. The page will be inserted into
2147 * the object and returned to the caller.
2148 */
2149
2150 vm_page_t
2151 vm_page_alloc_guard(
2152 vm_object_t object,
2153 vm_object_offset_t offset)
2154 {
2155 register vm_page_t mem;
2156
2157 vm_object_lock_assert_exclusive(object);
2158 mem = vm_page_grab_guard();
2159 if (mem == VM_PAGE_NULL)
2160 return VM_PAGE_NULL;
2161
2162 vm_page_insert(mem, object, offset);
2163
2164 return(mem);
2165 }
2166
2167
2168 counter(unsigned int c_laundry_pages_freed = 0;)
2169
2170 /*
2171 * vm_page_free_prepare:
2172 *
2173 * Removes page from any queue it may be on
2174 * and disassociates it from its VM object.
2175 *
2176 * Object and page queues must be locked prior to entry.
2177 */
2178 static void
2179 vm_page_free_prepare(
2180 vm_page_t mem)
2181 {
2182 vm_page_free_prepare_queues(mem);
2183 vm_page_free_prepare_object(mem, TRUE);
2184 }
2185
2186
2187 void
2188 vm_page_free_prepare_queues(
2189 vm_page_t mem)
2190 {
2191 VM_PAGE_CHECK(mem);
2192 assert(!mem->free);
2193 assert(!mem->cleaning);
2194 assert(!mem->pageout);
2195 #if DEBUG
2196 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2197 if (mem->free)
2198 panic("vm_page_free: freeing page on free list\n");
2199 #endif
2200 if (mem->object) {
2201 vm_object_lock_assert_exclusive(mem->object);
2202 }
2203
2204 if (mem->laundry) {
2205 /*
2206 * We may have to free a page while it's being laundered
2207 * if we lost its pager (due to a forced unmount, for example).
2208 * We need to call vm_pageout_throttle_up() before removing
2209 * the page from its VM object, so that we can find out on
2210 * which pageout queue the page is on.
2211 */
2212 vm_pageout_throttle_up(mem);
2213 counter(++c_laundry_pages_freed);
2214 }
2215 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2216
2217 if (VM_PAGE_WIRED(mem)) {
2218 if (mem->object) {
2219 assert(mem->object->wired_page_count > 0);
2220 mem->object->wired_page_count--;
2221 assert(mem->object->resident_page_count >=
2222 mem->object->wired_page_count);
2223
2224 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2225 OSAddAtomic(+1, &vm_page_purgeable_count);
2226 assert(vm_page_purgeable_wired_count > 0);
2227 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2228 }
2229 }
2230 if (!mem->private && !mem->fictitious)
2231 vm_page_wire_count--;
2232 mem->wire_count = 0;
2233 assert(!mem->gobbled);
2234 } else if (mem->gobbled) {
2235 if (!mem->private && !mem->fictitious)
2236 vm_page_wire_count--;
2237 vm_page_gobble_count--;
2238 }
2239 }
2240
2241
2242 void
2243 vm_page_free_prepare_object(
2244 vm_page_t mem,
2245 boolean_t remove_from_hash)
2246 {
2247 if (mem->tabled)
2248 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2249
2250 PAGE_WAKEUP(mem); /* clears wanted */
2251
2252 if (mem->private) {
2253 mem->private = FALSE;
2254 mem->fictitious = TRUE;
2255 mem->phys_page = vm_page_fictitious_addr;
2256 }
2257 if ( !mem->fictitious) {
2258 if (mem->zero_fill == TRUE)
2259 VM_ZF_COUNT_DECR();
2260 vm_page_init(mem, mem->phys_page, mem->lopage);
2261 }
2262 }
2263
2264
2265 /*
2266 * vm_page_free:
2267 *
2268 * Returns the given page to the free list,
2269 * disassociating it with any VM object.
2270 *
2271 * Object and page queues must be locked prior to entry.
2272 */
2273 void
2274 vm_page_free(
2275 vm_page_t mem)
2276 {
2277 vm_page_free_prepare(mem);
2278
2279 if (mem->fictitious) {
2280 vm_page_release_fictitious(mem);
2281 } else {
2282 vm_page_release(mem);
2283 }
2284 }
2285
2286
2287 void
2288 vm_page_free_unlocked(
2289 vm_page_t mem,
2290 boolean_t remove_from_hash)
2291 {
2292 vm_page_lockspin_queues();
2293 vm_page_free_prepare_queues(mem);
2294 vm_page_unlock_queues();
2295
2296 vm_page_free_prepare_object(mem, remove_from_hash);
2297
2298 if (mem->fictitious) {
2299 vm_page_release_fictitious(mem);
2300 } else {
2301 vm_page_release(mem);
2302 }
2303 }
2304
2305 /*
2306 * Free a list of pages. The list can be up to several hundred pages,
2307 * as blocked up by vm_pageout_scan().
2308 * The big win is not having to take the free list lock once
2309 * per page. We sort the incoming pages into n lists, one for
2310 * each color.
2311 */
2312 void
2313 vm_page_free_list(
2314 vm_page_t mem,
2315 boolean_t prepare_object)
2316 {
2317 vm_page_t nxt;
2318 int pg_count = 0;
2319 int color;
2320 int inuse_list_head = -1;
2321
2322 queue_head_t free_list[MAX_COLORS];
2323 int inuse[MAX_COLORS];
2324
2325 for (color = 0; color < (signed) vm_colors; color++) {
2326 queue_init(&free_list[color]);
2327 }
2328
2329 while (mem) {
2330 assert(!mem->inactive);
2331 assert(!mem->active);
2332 assert(!mem->throttled);
2333 assert(!mem->free);
2334 assert(!mem->speculative);
2335 assert(!VM_PAGE_WIRED(mem));
2336 assert(mem->pageq.prev == NULL);
2337
2338 nxt = (vm_page_t)(mem->pageq.next);
2339
2340 if (prepare_object == TRUE)
2341 vm_page_free_prepare_object(mem, TRUE);
2342
2343 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2344 assert(pmap_verify_free(mem->phys_page));
2345 }
2346
2347 if (!mem->fictitious) {
2348 assert(mem->busy);
2349 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2350 vm_lopage_free_count < vm_lopage_free_limit &&
2351 mem->phys_page < max_valid_low_ppnum) {
2352 mem->pageq.next = NULL;
2353 vm_page_release(mem);
2354 } else {
2355
2356 /*
2357 * IMPORTANT: we can't set the page "free" here
2358 * because that would make the page eligible for
2359 * a physically-contiguous allocation (see
2360 * vm_page_find_contiguous()) right away (we don't
2361 * hold the vm_page_queue_free lock). That would
2362 * cause trouble because the page is not actually
2363 * in the free queue yet...
2364 */
2365 color = mem->phys_page & vm_color_mask;
2366 if (queue_empty(&free_list[color])) {
2367 inuse[color] = inuse_list_head;
2368 inuse_list_head = color;
2369 }
2370 queue_enter_first(&free_list[color],
2371 mem,
2372 vm_page_t,
2373 pageq);
2374 pg_count++;
2375 }
2376 } else {
2377 assert(mem->phys_page == vm_page_fictitious_addr ||
2378 mem->phys_page == vm_page_guard_addr);
2379 vm_page_release_fictitious(mem);
2380 }
2381 mem = nxt;
2382 }
2383 if (pg_count) {
2384 unsigned int avail_free_count;
2385 unsigned int need_wakeup = 0;
2386 unsigned int need_priv_wakeup = 0;
2387
2388 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2389
2390 color = inuse_list_head;
2391
2392 while( color != -1 ) {
2393 vm_page_t first, last;
2394 vm_page_t first_free;
2395
2396 /*
2397 * Now that we hold the vm_page_queue_free lock,
2398 * it's safe to mark all pages in our local queue
2399 * as "free"...
2400 */
2401 queue_iterate(&free_list[color],
2402 mem,
2403 vm_page_t,
2404 pageq) {
2405 assert(!mem->free);
2406 assert(mem->busy);
2407 mem->free = TRUE;
2408 }
2409
2410 /*
2411 * ... and insert our local queue at the head of
2412 * the global free queue.
2413 */
2414 first = (vm_page_t) queue_first(&free_list[color]);
2415 last = (vm_page_t) queue_last(&free_list[color]);
2416 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2417 if (queue_empty(&vm_page_queue_free[color])) {
2418 queue_last(&vm_page_queue_free[color]) =
2419 (queue_entry_t) last;
2420 } else {
2421 queue_prev(&first_free->pageq) =
2422 (queue_entry_t) last;
2423 }
2424 queue_first(&vm_page_queue_free[color]) =
2425 (queue_entry_t) first;
2426 queue_prev(&first->pageq) =
2427 (queue_entry_t) &vm_page_queue_free[color];
2428 queue_next(&last->pageq) =
2429 (queue_entry_t) first_free;
2430
2431 /* next color */
2432 color = inuse[color];
2433 }
2434
2435 vm_page_free_count += pg_count;
2436 avail_free_count = vm_page_free_count;
2437
2438 if (vm_page_free_wanted_privileged > 0 &&
2439 avail_free_count > 0) {
2440 if (avail_free_count < vm_page_free_wanted_privileged) {
2441 need_priv_wakeup = avail_free_count;
2442 vm_page_free_wanted_privileged -=
2443 avail_free_count;
2444 avail_free_count = 0;
2445 } else {
2446 need_priv_wakeup = vm_page_free_wanted_privileged;
2447 vm_page_free_wanted_privileged = 0;
2448 avail_free_count -=
2449 vm_page_free_wanted_privileged;
2450 }
2451 }
2452
2453 if (vm_page_free_wanted > 0 &&
2454 avail_free_count > vm_page_free_reserved) {
2455 unsigned int available_pages;
2456
2457 available_pages = (avail_free_count -
2458 vm_page_free_reserved);
2459
2460 if (available_pages >= vm_page_free_wanted) {
2461 need_wakeup = vm_page_free_wanted;
2462 vm_page_free_wanted = 0;
2463 } else {
2464 need_wakeup = available_pages;
2465 vm_page_free_wanted -= available_pages;
2466 }
2467 }
2468 lck_mtx_unlock(&vm_page_queue_free_lock);
2469
2470 if (need_priv_wakeup != 0) {
2471 /*
2472 * There shouldn't be that many VM-privileged threads,
2473 * so let's wake them all up, even if we don't quite
2474 * have enough pages to satisfy them all.
2475 */
2476 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2477 }
2478 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2479 /*
2480 * We don't expect to have any more waiters
2481 * after this, so let's wake them all up at
2482 * once.
2483 */
2484 thread_wakeup((event_t) &vm_page_free_count);
2485 } else for (; need_wakeup != 0; need_wakeup--) {
2486 /*
2487 * Wake up one waiter per page we just released.
2488 */
2489 thread_wakeup_one((event_t) &vm_page_free_count);
2490 }
2491
2492 VM_CHECK_MEMORYSTATUS;
2493 }
2494 }
2495
2496
2497 /*
2498 * vm_page_wire:
2499 *
2500 * Mark this page as wired down by yet
2501 * another map, removing it from paging queues
2502 * as necessary.
2503 *
2504 * The page's object and the page queues must be locked.
2505 */
2506 void
2507 vm_page_wire(
2508 register vm_page_t mem)
2509 {
2510
2511 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2512
2513 VM_PAGE_CHECK(mem);
2514 if (mem->object) {
2515 vm_object_lock_assert_exclusive(mem->object);
2516 } else {
2517 /*
2518 * In theory, the page should be in an object before it
2519 * gets wired, since we need to hold the object lock
2520 * to update some fields in the page structure.
2521 * However, some code (i386 pmap, for example) might want
2522 * to wire a page before it gets inserted into an object.
2523 * That's somewhat OK, as long as nobody else can get to
2524 * that page and update it at the same time.
2525 */
2526 }
2527 #if DEBUG
2528 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2529 #endif
2530 if ( !VM_PAGE_WIRED(mem)) {
2531 VM_PAGE_QUEUES_REMOVE(mem);
2532
2533 if (mem->object) {
2534 mem->object->wired_page_count++;
2535 assert(mem->object->resident_page_count >=
2536 mem->object->wired_page_count);
2537 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2538 assert(vm_page_purgeable_count > 0);
2539 OSAddAtomic(-1, &vm_page_purgeable_count);
2540 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2541 }
2542 if (mem->object->all_reusable) {
2543 /*
2544 * Wired pages are not counted as "re-usable"
2545 * in "all_reusable" VM objects, so nothing
2546 * to do here.
2547 */
2548 } else if (mem->reusable) {
2549 /*
2550 * This page is not "re-usable" when it's
2551 * wired, so adjust its state and the
2552 * accounting.
2553 */
2554 vm_object_reuse_pages(mem->object,
2555 mem->offset,
2556 mem->offset+PAGE_SIZE_64,
2557 FALSE);
2558 }
2559 }
2560 assert(!mem->reusable);
2561
2562 if (!mem->private && !mem->fictitious && !mem->gobbled)
2563 vm_page_wire_count++;
2564 if (mem->gobbled)
2565 vm_page_gobble_count--;
2566 mem->gobbled = FALSE;
2567 if (mem->zero_fill == TRUE) {
2568 mem->zero_fill = FALSE;
2569 VM_ZF_COUNT_DECR();
2570 }
2571
2572 VM_CHECK_MEMORYSTATUS;
2573
2574 /*
2575 * ENCRYPTED SWAP:
2576 * The page could be encrypted, but
2577 * We don't have to decrypt it here
2578 * because we don't guarantee that the
2579 * data is actually valid at this point.
2580 * The page will get decrypted in
2581 * vm_fault_wire() if needed.
2582 */
2583 }
2584 assert(!mem->gobbled);
2585 mem->wire_count++;
2586 VM_PAGE_CHECK(mem);
2587 }
2588
2589 /*
2590 * vm_page_gobble:
2591 *
2592 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2593 *
2594 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2595 */
2596 void
2597 vm_page_gobble(
2598 register vm_page_t mem)
2599 {
2600 vm_page_lockspin_queues();
2601 VM_PAGE_CHECK(mem);
2602
2603 assert(!mem->gobbled);
2604 assert( !VM_PAGE_WIRED(mem));
2605
2606 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2607 if (!mem->private && !mem->fictitious)
2608 vm_page_wire_count++;
2609 }
2610 vm_page_gobble_count++;
2611 mem->gobbled = TRUE;
2612 vm_page_unlock_queues();
2613 }
2614
2615 /*
2616 * vm_page_unwire:
2617 *
2618 * Release one wiring of this page, potentially
2619 * enabling it to be paged again.
2620 *
2621 * The page's object and the page queues must be locked.
2622 */
2623 void
2624 vm_page_unwire(
2625 vm_page_t mem,
2626 boolean_t queueit)
2627 {
2628
2629 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2630
2631 VM_PAGE_CHECK(mem);
2632 assert(VM_PAGE_WIRED(mem));
2633 assert(mem->object != VM_OBJECT_NULL);
2634 #if DEBUG
2635 vm_object_lock_assert_exclusive(mem->object);
2636 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2637 #endif
2638 if (--mem->wire_count == 0) {
2639 assert(!mem->private && !mem->fictitious);
2640 vm_page_wire_count--;
2641 assert(mem->object->wired_page_count > 0);
2642 mem->object->wired_page_count--;
2643 assert(mem->object->resident_page_count >=
2644 mem->object->wired_page_count);
2645 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2646 OSAddAtomic(+1, &vm_page_purgeable_count);
2647 assert(vm_page_purgeable_wired_count > 0);
2648 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2649 }
2650 assert(!mem->laundry);
2651 assert(mem->object != kernel_object);
2652 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2653
2654 if (queueit == TRUE) {
2655 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2656 vm_page_deactivate(mem);
2657 } else {
2658 vm_page_activate(mem);
2659 }
2660 }
2661
2662 VM_CHECK_MEMORYSTATUS;
2663
2664 }
2665 VM_PAGE_CHECK(mem);
2666 }
2667
2668 /*
2669 * vm_page_deactivate:
2670 *
2671 * Returns the given page to the inactive list,
2672 * indicating that no physical maps have access
2673 * to this page. [Used by the physical mapping system.]
2674 *
2675 * The page queues must be locked.
2676 */
2677 void
2678 vm_page_deactivate(
2679 vm_page_t m)
2680 {
2681 vm_page_deactivate_internal(m, TRUE);
2682 }
2683
2684
2685 void
2686 vm_page_deactivate_internal(
2687 vm_page_t m,
2688 boolean_t clear_hw_reference)
2689 {
2690
2691 VM_PAGE_CHECK(m);
2692 assert(m->object != kernel_object);
2693 assert(m->phys_page != vm_page_guard_addr);
2694
2695 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2696 #if DEBUG
2697 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2698 #endif
2699 /*
2700 * This page is no longer very interesting. If it was
2701 * interesting (active or inactive/referenced), then we
2702 * clear the reference bit and (re)enter it in the
2703 * inactive queue. Note wired pages should not have
2704 * their reference bit cleared.
2705 */
2706 assert ( !(m->absent && !m->unusual));
2707
2708 if (m->gobbled) { /* can this happen? */
2709 assert( !VM_PAGE_WIRED(m));
2710
2711 if (!m->private && !m->fictitious)
2712 vm_page_wire_count--;
2713 vm_page_gobble_count--;
2714 m->gobbled = FALSE;
2715 }
2716 if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2717 return;
2718
2719 if (!m->absent && clear_hw_reference == TRUE)
2720 pmap_clear_reference(m->phys_page);
2721
2722 m->reference = FALSE;
2723 m->no_cache = FALSE;
2724
2725 if (!m->inactive) {
2726 VM_PAGE_QUEUES_REMOVE(m);
2727
2728 assert(!m->laundry);
2729 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2730
2731 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2732 m->dirty && m->object->internal &&
2733 (m->object->purgable == VM_PURGABLE_DENY ||
2734 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2735 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2736 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2737 m->throttled = TRUE;
2738 vm_page_throttled_count++;
2739 } else {
2740 if (m->object->named && m->object->ref_count == 1) {
2741 vm_page_speculate(m, FALSE);
2742 #if DEVELOPMENT || DEBUG
2743 vm_page_speculative_recreated++;
2744 #endif
2745 } else {
2746 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2747 }
2748 }
2749 }
2750 }
2751
2752 /*
2753 * vm_page_activate:
2754 *
2755 * Put the specified page on the active list (if appropriate).
2756 *
2757 * The page queues must be locked.
2758 */
2759
2760 void
2761 vm_page_activate(
2762 register vm_page_t m)
2763 {
2764 VM_PAGE_CHECK(m);
2765 #ifdef FIXME_4778297
2766 assert(m->object != kernel_object);
2767 #endif
2768 assert(m->phys_page != vm_page_guard_addr);
2769 #if DEBUG
2770 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2771 #endif
2772 assert( !(m->absent && !m->unusual));
2773
2774 if (m->gobbled) {
2775 assert( !VM_PAGE_WIRED(m));
2776 if (!m->private && !m->fictitious)
2777 vm_page_wire_count--;
2778 vm_page_gobble_count--;
2779 m->gobbled = FALSE;
2780 }
2781 if (m->private || m->fictitious)
2782 return;
2783
2784 #if DEBUG
2785 if (m->active)
2786 panic("vm_page_activate: already active");
2787 #endif
2788
2789 if (m->speculative) {
2790 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2791 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2792 }
2793
2794 VM_PAGE_QUEUES_REMOVE(m);
2795
2796 if ( !VM_PAGE_WIRED(m)) {
2797 assert(!m->laundry);
2798 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2799 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2800 m->dirty && m->object->internal &&
2801 (m->object->purgable == VM_PURGABLE_DENY ||
2802 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2803 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2804 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2805 m->throttled = TRUE;
2806 vm_page_throttled_count++;
2807 } else {
2808 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2809 m->active = TRUE;
2810 vm_page_active_count++;
2811 }
2812 m->reference = TRUE;
2813 m->no_cache = FALSE;
2814 }
2815 VM_PAGE_CHECK(m);
2816 }
2817
2818
2819 /*
2820 * vm_page_speculate:
2821 *
2822 * Put the specified page on the speculative list (if appropriate).
2823 *
2824 * The page queues must be locked.
2825 */
2826 void
2827 vm_page_speculate(
2828 vm_page_t m,
2829 boolean_t new)
2830 {
2831 struct vm_speculative_age_q *aq;
2832
2833 VM_PAGE_CHECK(m);
2834 assert(m->object != kernel_object);
2835 assert(m->phys_page != vm_page_guard_addr);
2836 #if DEBUG
2837 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2838 #endif
2839 assert( !(m->absent && !m->unusual));
2840
2841 if (m->private || m->fictitious)
2842 return;
2843
2844 VM_PAGE_QUEUES_REMOVE(m);
2845
2846 if ( !VM_PAGE_WIRED(m)) {
2847 mach_timespec_t ts;
2848 clock_sec_t sec;
2849 clock_nsec_t nsec;
2850
2851 clock_get_system_nanotime(&sec, &nsec);
2852 ts.tv_sec = (unsigned int) sec;
2853 ts.tv_nsec = nsec;
2854
2855 if (vm_page_speculative_count == 0) {
2856
2857 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2858 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2859
2860 aq = &vm_page_queue_speculative[speculative_age_index];
2861
2862 /*
2863 * set the timer to begin a new group
2864 */
2865 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2866 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2867
2868 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2869 } else {
2870 aq = &vm_page_queue_speculative[speculative_age_index];
2871
2872 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2873
2874 speculative_age_index++;
2875
2876 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2877 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2878 if (speculative_age_index == speculative_steal_index) {
2879 speculative_steal_index = speculative_age_index + 1;
2880
2881 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2882 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2883 }
2884 aq = &vm_page_queue_speculative[speculative_age_index];
2885
2886 if (!queue_empty(&aq->age_q))
2887 vm_page_speculate_ageit(aq);
2888
2889 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2890 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2891
2892 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2893 }
2894 }
2895 enqueue_tail(&aq->age_q, &m->pageq);
2896 m->speculative = TRUE;
2897 vm_page_speculative_count++;
2898
2899 if (new == TRUE) {
2900 vm_object_lock_assert_exclusive(m->object);
2901
2902 m->object->pages_created++;
2903 #if DEVELOPMENT || DEBUG
2904 vm_page_speculative_created++;
2905 #endif
2906 }
2907 }
2908 VM_PAGE_CHECK(m);
2909 }
2910
2911
2912 /*
2913 * move pages from the specified aging bin to
2914 * the speculative bin that pageout_scan claims from
2915 *
2916 * The page queues must be locked.
2917 */
2918 void
2919 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2920 {
2921 struct vm_speculative_age_q *sq;
2922 vm_page_t t;
2923
2924 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2925
2926 if (queue_empty(&sq->age_q)) {
2927 sq->age_q.next = aq->age_q.next;
2928 sq->age_q.prev = aq->age_q.prev;
2929
2930 t = (vm_page_t)sq->age_q.next;
2931 t->pageq.prev = &sq->age_q;
2932
2933 t = (vm_page_t)sq->age_q.prev;
2934 t->pageq.next = &sq->age_q;
2935 } else {
2936 t = (vm_page_t)sq->age_q.prev;
2937 t->pageq.next = aq->age_q.next;
2938
2939 t = (vm_page_t)aq->age_q.next;
2940 t->pageq.prev = sq->age_q.prev;
2941
2942 t = (vm_page_t)aq->age_q.prev;
2943 t->pageq.next = &sq->age_q;
2944
2945 sq->age_q.prev = aq->age_q.prev;
2946 }
2947 queue_init(&aq->age_q);
2948 }
2949
2950
2951 void
2952 vm_page_lru(
2953 vm_page_t m)
2954 {
2955 VM_PAGE_CHECK(m);
2956 assert(m->object != kernel_object);
2957 assert(m->phys_page != vm_page_guard_addr);
2958
2959 #if DEBUG
2960 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2961 #endif
2962 if (m->active || m->reference)
2963 return;
2964
2965 if (m->private || (VM_PAGE_WIRED(m)))
2966 return;
2967
2968 m->no_cache = FALSE;
2969
2970 VM_PAGE_QUEUES_REMOVE(m);
2971
2972 assert(!m->laundry);
2973 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2974
2975 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2976 }
2977
2978
2979 void
2980 vm_page_reactivate_all_throttled(void)
2981 {
2982 vm_page_t first_throttled, last_throttled;
2983 vm_page_t first_active;
2984 vm_page_t m;
2985 int extra_active_count;
2986
2987 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
2988 return;
2989
2990 extra_active_count = 0;
2991 vm_page_lock_queues();
2992 if (! queue_empty(&vm_page_queue_throttled)) {
2993 /*
2994 * Switch "throttled" pages to "active".
2995 */
2996 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
2997 VM_PAGE_CHECK(m);
2998 assert(m->throttled);
2999 assert(!m->active);
3000 assert(!m->inactive);
3001 assert(!m->speculative);
3002 assert(!VM_PAGE_WIRED(m));
3003
3004 extra_active_count++;
3005
3006 m->throttled = FALSE;
3007 m->active = TRUE;
3008 VM_PAGE_CHECK(m);
3009 }
3010
3011 /*
3012 * Transfer the entire throttled queue to a regular LRU page queues.
3013 * We insert it at the head of the active queue, so that these pages
3014 * get re-evaluated by the LRU algorithm first, since they've been
3015 * completely out of it until now.
3016 */
3017 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3018 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3019 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3020 if (queue_empty(&vm_page_queue_active)) {
3021 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3022 } else {
3023 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3024 }
3025 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3026 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3027 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3028
3029 #if DEBUG
3030 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3031 #endif
3032 queue_init(&vm_page_queue_throttled);
3033 /*
3034 * Adjust the global page counts.
3035 */
3036 vm_page_active_count += extra_active_count;
3037 vm_page_throttled_count = 0;
3038 }
3039 assert(vm_page_throttled_count == 0);
3040 assert(queue_empty(&vm_page_queue_throttled));
3041 vm_page_unlock_queues();
3042 }
3043
3044
3045 /*
3046 * move pages from the indicated local queue to the global active queue
3047 * its ok to fail if we're below the hard limit and force == FALSE
3048 * the nolocks == TRUE case is to allow this function to be run on
3049 * the hibernate path
3050 */
3051
3052 void
3053 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3054 {
3055 struct vpl *lq;
3056 vm_page_t first_local, last_local;
3057 vm_page_t first_active;
3058 vm_page_t m;
3059 uint32_t count = 0;
3060
3061 if (vm_page_local_q == NULL)
3062 return;
3063
3064 lq = &vm_page_local_q[lid].vpl_un.vpl;
3065
3066 if (nolocks == FALSE) {
3067 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3068 if ( !vm_page_trylockspin_queues())
3069 return;
3070 } else
3071 vm_page_lockspin_queues();
3072
3073 VPL_LOCK(&lq->vpl_lock);
3074 }
3075 if (lq->vpl_count) {
3076 /*
3077 * Switch "local" pages to "active".
3078 */
3079 assert(!queue_empty(&lq->vpl_queue));
3080
3081 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3082 VM_PAGE_CHECK(m);
3083 assert(m->local);
3084 assert(!m->active);
3085 assert(!m->inactive);
3086 assert(!m->speculative);
3087 assert(!VM_PAGE_WIRED(m));
3088 assert(!m->throttled);
3089 assert(!m->fictitious);
3090
3091 if (m->local_id != lid)
3092 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3093
3094 m->local_id = 0;
3095 m->local = FALSE;
3096 m->active = TRUE;
3097 VM_PAGE_CHECK(m);
3098
3099 count++;
3100 }
3101 if (count != lq->vpl_count)
3102 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3103
3104 /*
3105 * Transfer the entire local queue to a regular LRU page queues.
3106 */
3107 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3108 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3109 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3110
3111 if (queue_empty(&vm_page_queue_active)) {
3112 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3113 } else {
3114 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3115 }
3116 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3117 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3118 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3119
3120 queue_init(&lq->vpl_queue);
3121 /*
3122 * Adjust the global page counts.
3123 */
3124 vm_page_active_count += lq->vpl_count;
3125 lq->vpl_count = 0;
3126 }
3127 assert(queue_empty(&lq->vpl_queue));
3128
3129 if (nolocks == FALSE) {
3130 VPL_UNLOCK(&lq->vpl_lock);
3131 vm_page_unlock_queues();
3132 }
3133 }
3134
3135 /*
3136 * vm_page_part_zero_fill:
3137 *
3138 * Zero-fill a part of the page.
3139 */
3140 void
3141 vm_page_part_zero_fill(
3142 vm_page_t m,
3143 vm_offset_t m_pa,
3144 vm_size_t len)
3145 {
3146 vm_page_t tmp;
3147
3148 VM_PAGE_CHECK(m);
3149 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3150 pmap_zero_part_page(m->phys_page, m_pa, len);
3151 #else
3152 while (1) {
3153 tmp = vm_page_grab();
3154 if (tmp == VM_PAGE_NULL) {
3155 vm_page_wait(THREAD_UNINT);
3156 continue;
3157 }
3158 break;
3159 }
3160 vm_page_zero_fill(tmp);
3161 if(m_pa != 0) {
3162 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3163 }
3164 if((m_pa + len) < PAGE_SIZE) {
3165 vm_page_part_copy(m, m_pa + len, tmp,
3166 m_pa + len, PAGE_SIZE - (m_pa + len));
3167 }
3168 vm_page_copy(tmp,m);
3169 VM_PAGE_FREE(tmp);
3170 #endif
3171
3172 }
3173
3174 /*
3175 * vm_page_zero_fill:
3176 *
3177 * Zero-fill the specified page.
3178 */
3179 void
3180 vm_page_zero_fill(
3181 vm_page_t m)
3182 {
3183 XPR(XPR_VM_PAGE,
3184 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3185 m->object, m->offset, m, 0,0);
3186
3187 VM_PAGE_CHECK(m);
3188
3189 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3190 pmap_zero_page(m->phys_page);
3191 }
3192
3193 /*
3194 * vm_page_part_copy:
3195 *
3196 * copy part of one page to another
3197 */
3198
3199 void
3200 vm_page_part_copy(
3201 vm_page_t src_m,
3202 vm_offset_t src_pa,
3203 vm_page_t dst_m,
3204 vm_offset_t dst_pa,
3205 vm_size_t len)
3206 {
3207 VM_PAGE_CHECK(src_m);
3208 VM_PAGE_CHECK(dst_m);
3209
3210 pmap_copy_part_page(src_m->phys_page, src_pa,
3211 dst_m->phys_page, dst_pa, len);
3212 }
3213
3214 /*
3215 * vm_page_copy:
3216 *
3217 * Copy one page to another
3218 *
3219 * ENCRYPTED SWAP:
3220 * The source page should not be encrypted. The caller should
3221 * make sure the page is decrypted first, if necessary.
3222 */
3223
3224 int vm_page_copy_cs_validations = 0;
3225 int vm_page_copy_cs_tainted = 0;
3226
3227 void
3228 vm_page_copy(
3229 vm_page_t src_m,
3230 vm_page_t dest_m)
3231 {
3232 XPR(XPR_VM_PAGE,
3233 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3234 src_m->object, src_m->offset,
3235 dest_m->object, dest_m->offset,
3236 0);
3237
3238 VM_PAGE_CHECK(src_m);
3239 VM_PAGE_CHECK(dest_m);
3240
3241 /*
3242 * ENCRYPTED SWAP:
3243 * The source page should not be encrypted at this point.
3244 * The destination page will therefore not contain encrypted
3245 * data after the copy.
3246 */
3247 if (src_m->encrypted) {
3248 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3249 }
3250 dest_m->encrypted = FALSE;
3251
3252 if (src_m->object != VM_OBJECT_NULL &&
3253 src_m->object->code_signed) {
3254 /*
3255 * We're copying a page from a code-signed object.
3256 * Whoever ends up mapping the copy page might care about
3257 * the original page's integrity, so let's validate the
3258 * source page now.
3259 */
3260 vm_page_copy_cs_validations++;
3261 vm_page_validate_cs(src_m);
3262 }
3263
3264 if (vm_page_is_slideable(src_m)) {
3265 boolean_t was_busy = src_m->busy;
3266 src_m->busy = TRUE;
3267 (void) vm_page_slide(src_m, 0);
3268 assert(src_m->busy);
3269 if(!was_busy) {
3270 PAGE_WAKEUP_DONE(src_m);
3271 }
3272 }
3273
3274 /*
3275 * Propagate the cs_tainted bit to the copy page. Do not propagate
3276 * the cs_validated bit.
3277 */
3278 dest_m->cs_tainted = src_m->cs_tainted;
3279 if (dest_m->cs_tainted) {
3280 vm_page_copy_cs_tainted++;
3281 }
3282 dest_m->slid = src_m->slid;
3283 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3284 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3285 }
3286
3287 #if MACH_ASSERT
3288 static void
3289 _vm_page_print(
3290 vm_page_t p)
3291 {
3292 printf("vm_page %p: \n", p);
3293 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3294 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3295 printf(" next=%p\n", p->next);
3296 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3297 printf(" wire_count=%u\n", p->wire_count);
3298
3299 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3300 (p->local ? "" : "!"),
3301 (p->inactive ? "" : "!"),
3302 (p->active ? "" : "!"),
3303 (p->pageout_queue ? "" : "!"),
3304 (p->speculative ? "" : "!"),
3305 (p->laundry ? "" : "!"));
3306 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3307 (p->free ? "" : "!"),
3308 (p->reference ? "" : "!"),
3309 (p->gobbled ? "" : "!"),
3310 (p->private ? "" : "!"),
3311 (p->throttled ? "" : "!"));
3312 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3313 (p->busy ? "" : "!"),
3314 (p->wanted ? "" : "!"),
3315 (p->tabled ? "" : "!"),
3316 (p->fictitious ? "" : "!"),
3317 (p->pmapped ? "" : "!"),
3318 (p->wpmapped ? "" : "!"));
3319 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3320 (p->pageout ? "" : "!"),
3321 (p->absent ? "" : "!"),
3322 (p->error ? "" : "!"),
3323 (p->dirty ? "" : "!"),
3324 (p->cleaning ? "" : "!"),
3325 (p->precious ? "" : "!"),
3326 (p->clustered ? "" : "!"));
3327 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3328 (p->overwriting ? "" : "!"),
3329 (p->restart ? "" : "!"),
3330 (p->unusual ? "" : "!"),
3331 (p->encrypted ? "" : "!"),
3332 (p->encrypted_cleaning ? "" : "!"));
3333 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3334 (p->list_req_pending ? "" : "!"),
3335 (p->dump_cleaning ? "" : "!"),
3336 (p->cs_validated ? "" : "!"),
3337 (p->cs_tainted ? "" : "!"),
3338 (p->no_cache ? "" : "!"));
3339 printf(" %szero_fill\n",
3340 (p->zero_fill ? "" : "!"));
3341
3342 printf("phys_page=0x%x\n", p->phys_page);
3343 }
3344
3345 /*
3346 * Check that the list of pages is ordered by
3347 * ascending physical address and has no holes.
3348 */
3349 static int
3350 vm_page_verify_contiguous(
3351 vm_page_t pages,
3352 unsigned int npages)
3353 {
3354 register vm_page_t m;
3355 unsigned int page_count;
3356 vm_offset_t prev_addr;
3357
3358 prev_addr = pages->phys_page;
3359 page_count = 1;
3360 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3361 if (m->phys_page != prev_addr + 1) {
3362 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3363 m, (long)prev_addr, m->phys_page);
3364 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3365 panic("vm_page_verify_contiguous: not contiguous!");
3366 }
3367 prev_addr = m->phys_page;
3368 ++page_count;
3369 }
3370 if (page_count != npages) {
3371 printf("pages %p actual count 0x%x but requested 0x%x\n",
3372 pages, page_count, npages);
3373 panic("vm_page_verify_contiguous: count error");
3374 }
3375 return 1;
3376 }
3377
3378
3379 /*
3380 * Check the free lists for proper length etc.
3381 */
3382 static unsigned int
3383 vm_page_verify_free_list(
3384 queue_head_t *vm_page_queue,
3385 unsigned int color,
3386 vm_page_t look_for_page,
3387 boolean_t expect_page)
3388 {
3389 unsigned int npages;
3390 vm_page_t m;
3391 vm_page_t prev_m;
3392 boolean_t found_page;
3393
3394 found_page = FALSE;
3395 npages = 0;
3396 prev_m = (vm_page_t) vm_page_queue;
3397 queue_iterate(vm_page_queue,
3398 m,
3399 vm_page_t,
3400 pageq) {
3401
3402 if (m == look_for_page) {
3403 found_page = TRUE;
3404 }
3405 if ((vm_page_t) m->pageq.prev != prev_m)
3406 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3407 color, npages, m, m->pageq.prev, prev_m);
3408 if ( ! m->busy )
3409 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3410 color, npages, m);
3411 if (color != (unsigned int) -1) {
3412 if ((m->phys_page & vm_color_mask) != color)
3413 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3414 color, npages, m, m->phys_page & vm_color_mask, color);
3415 if ( ! m->free )
3416 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3417 color, npages, m);
3418 }
3419 ++npages;
3420 prev_m = m;
3421 }
3422 if (look_for_page != VM_PAGE_NULL) {
3423 unsigned int other_color;
3424
3425 if (expect_page && !found_page) {
3426 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3427 color, npages, look_for_page, look_for_page->phys_page);
3428 _vm_page_print(look_for_page);
3429 for (other_color = 0;
3430 other_color < vm_colors;
3431 other_color++) {
3432 if (other_color == color)
3433 continue;
3434 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3435 other_color, look_for_page, FALSE);
3436 }
3437 if (color == (unsigned int) -1) {
3438 vm_page_verify_free_list(&vm_lopage_queue_free,
3439 (unsigned int) -1, look_for_page, FALSE);
3440 }
3441 panic("vm_page_verify_free_list(color=%u)\n", color);
3442 }
3443 if (!expect_page && found_page) {
3444 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3445 color, npages, look_for_page, look_for_page->phys_page);
3446 }
3447 }
3448 return npages;
3449 }
3450
3451 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3452 static void
3453 vm_page_verify_free_lists( void )
3454 {
3455 unsigned int color, npages, nlopages;
3456
3457 if (! vm_page_verify_free_lists_enabled)
3458 return;
3459
3460 npages = 0;
3461
3462 lck_mtx_lock(&vm_page_queue_free_lock);
3463
3464 for( color = 0; color < vm_colors; color++ ) {
3465 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3466 color, VM_PAGE_NULL, FALSE);
3467 }
3468 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3469 (unsigned int) -1,
3470 VM_PAGE_NULL, FALSE);
3471 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3472 panic("vm_page_verify_free_lists: "
3473 "npages %u free_count %d nlopages %u lo_free_count %u",
3474 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3475
3476 lck_mtx_unlock(&vm_page_queue_free_lock);
3477 }
3478
3479 void
3480 vm_page_queues_assert(
3481 vm_page_t mem,
3482 int val)
3483 {
3484 if (mem->free + mem->active + mem->inactive + mem->speculative +
3485 mem->throttled + mem->pageout_queue > (val)) {
3486 _vm_page_print(mem);
3487 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3488 }
3489 if (VM_PAGE_WIRED(mem)) {
3490 assert(!mem->active);
3491 assert(!mem->inactive);
3492 assert(!mem->speculative);
3493 assert(!mem->throttled);
3494 }
3495 }
3496 #endif /* MACH_ASSERT */
3497
3498
3499 /*
3500 * CONTIGUOUS PAGE ALLOCATION
3501 *
3502 * Find a region large enough to contain at least n pages
3503 * of contiguous physical memory.
3504 *
3505 * This is done by traversing the vm_page_t array in a linear fashion
3506 * we assume that the vm_page_t array has the avaiable physical pages in an
3507 * ordered, ascending list... this is currently true of all our implementations
3508 * and must remain so... there can be 'holes' in the array... we also can
3509 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3510 * which use to happen via 'vm_page_convert'... that function was no longer
3511 * being called and was removed...
3512 *
3513 * The basic flow consists of stabilizing some of the interesting state of
3514 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3515 * sweep at the beginning of the array looking for pages that meet our criterea
3516 * for a 'stealable' page... currently we are pretty conservative... if the page
3517 * meets this criterea and is physically contiguous to the previous page in the 'run'
3518 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3519 * and start to develop a new run... if at this point we've already considered
3520 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3521 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3522 * to other threads trying to acquire free pages (or move pages from q to q),
3523 * and then continue from the spot we left off... we only make 1 pass through the
3524 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3525 * which steals the pages from the queues they're currently on... pages on the free
3526 * queue can be stolen directly... pages that are on any of the other queues
3527 * must be removed from the object they are tabled on... this requires taking the
3528 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3529 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3530 * dump the pages we've currently stolen back to the free list, and pick up our
3531 * scan from the point where we aborted the 'current' run.
3532 *
3533 *
3534 * Requirements:
3535 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3536 *
3537 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3538 *
3539 * Algorithm:
3540 */
3541
3542 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3543
3544
3545 #define RESET_STATE_OF_RUN() \
3546 MACRO_BEGIN \
3547 prevcontaddr = -2; \
3548 start_pnum = -1; \
3549 free_considered = 0; \
3550 substitute_needed = 0; \
3551 npages = 0; \
3552 MACRO_END
3553
3554 /*
3555 * Can we steal in-use (i.e. not free) pages when searching for
3556 * physically-contiguous pages ?
3557 */
3558 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3559
3560 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3561 #if DEBUG
3562 int vm_page_find_contig_debug = 0;
3563 #endif
3564
3565 static vm_page_t
3566 vm_page_find_contiguous(
3567 unsigned int contig_pages,
3568 ppnum_t max_pnum,
3569 ppnum_t pnum_mask,
3570 boolean_t wire,
3571 int flags)
3572 {
3573 vm_page_t m = NULL;
3574 ppnum_t prevcontaddr;
3575 ppnum_t start_pnum;
3576 unsigned int npages, considered, scanned;
3577 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3578 unsigned int idx_last_contig_page_found = 0;
3579 int free_considered, free_available;
3580 int substitute_needed;
3581 boolean_t wrapped;
3582 #if DEBUG
3583 clock_sec_t tv_start_sec, tv_end_sec;
3584 clock_usec_t tv_start_usec, tv_end_usec;
3585 #endif
3586 #if MACH_ASSERT
3587 int yielded = 0;
3588 int dumped_run = 0;
3589 int stolen_pages = 0;
3590 #endif
3591
3592 if (contig_pages == 0)
3593 return VM_PAGE_NULL;
3594
3595 #if MACH_ASSERT
3596 vm_page_verify_free_lists();
3597 #endif
3598 #if DEBUG
3599 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3600 #endif
3601 vm_page_lock_queues();
3602 lck_mtx_lock(&vm_page_queue_free_lock);
3603
3604 RESET_STATE_OF_RUN();
3605
3606 scanned = 0;
3607 considered = 0;
3608 free_available = vm_page_free_count - vm_page_free_reserved;
3609
3610 wrapped = FALSE;
3611
3612 if(flags & KMA_LOMEM)
3613 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3614 else
3615 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3616
3617 orig_last_idx = idx_last_contig_page_found;
3618 last_idx = orig_last_idx;
3619
3620 for (page_idx = last_idx, start_idx = last_idx;
3621 npages < contig_pages && page_idx < vm_pages_count;
3622 page_idx++) {
3623 retry:
3624 if (wrapped &&
3625 npages == 0 &&
3626 page_idx >= orig_last_idx) {
3627 /*
3628 * We're back where we started and we haven't
3629 * found any suitable contiguous range. Let's
3630 * give up.
3631 */
3632 break;
3633 }
3634 scanned++;
3635 m = &vm_pages[page_idx];
3636
3637 assert(!m->fictitious);
3638 assert(!m->private);
3639
3640 if (max_pnum && m->phys_page > max_pnum) {
3641 /* no more low pages... */
3642 break;
3643 }
3644 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3645 /*
3646 * not aligned
3647 */
3648 RESET_STATE_OF_RUN();
3649
3650 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3651 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3652 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3653 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3654 m->pageout) {
3655 /*
3656 * page is in a transient state
3657 * or a state we don't want to deal
3658 * with, so don't consider it which
3659 * means starting a new run
3660 */
3661 RESET_STATE_OF_RUN();
3662
3663 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3664 /*
3665 * page needs to be on one of our queues
3666 * in order for it to be stable behind the
3667 * locks we hold at this point...
3668 * if not, don't consider it which
3669 * means starting a new run
3670 */
3671 RESET_STATE_OF_RUN();
3672
3673 } else if (!m->free && (!m->tabled || m->busy)) {
3674 /*
3675 * pages on the free list are always 'busy'
3676 * so we couldn't test for 'busy' in the check
3677 * for the transient states... pages that are
3678 * 'free' are never 'tabled', so we also couldn't
3679 * test for 'tabled'. So we check here to make
3680 * sure that a non-free page is not busy and is
3681 * tabled on an object...
3682 * if not, don't consider it which
3683 * means starting a new run
3684 */
3685 RESET_STATE_OF_RUN();
3686
3687 } else {
3688 if (m->phys_page != prevcontaddr + 1) {
3689 if ((m->phys_page & pnum_mask) != 0) {
3690 RESET_STATE_OF_RUN();
3691 goto did_consider;
3692 } else {
3693 npages = 1;
3694 start_idx = page_idx;
3695 start_pnum = m->phys_page;
3696 }
3697 } else {
3698 npages++;
3699 }
3700 prevcontaddr = m->phys_page;
3701
3702 VM_PAGE_CHECK(m);
3703 if (m->free) {
3704 free_considered++;
3705 } else {
3706 /*
3707 * This page is not free.
3708 * If we can't steal used pages,
3709 * we have to give up this run
3710 * and keep looking.
3711 * Otherwise, we might need to
3712 * move the contents of this page
3713 * into a substitute page.
3714 */
3715 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3716 if (m->pmapped || m->dirty) {
3717 substitute_needed++;
3718 }
3719 #else
3720 RESET_STATE_OF_RUN();
3721 #endif
3722 }
3723
3724 if ((free_considered + substitute_needed) > free_available) {
3725 /*
3726 * if we let this run continue
3727 * we will end up dropping the vm_page_free_count
3728 * below the reserve limit... we need to abort
3729 * this run, but we can at least re-consider this
3730 * page... thus the jump back to 'retry'
3731 */
3732 RESET_STATE_OF_RUN();
3733
3734 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3735 considered++;
3736 goto retry;
3737 }
3738 /*
3739 * free_available == 0
3740 * so can't consider any free pages... if
3741 * we went to retry in this case, we'd
3742 * get stuck looking at the same page
3743 * w/o making any forward progress
3744 * we also want to take this path if we've already
3745 * reached our limit that controls the lock latency
3746 */
3747 }
3748 }
3749 did_consider:
3750 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3751
3752 lck_mtx_unlock(&vm_page_queue_free_lock);
3753 vm_page_unlock_queues();
3754
3755 mutex_pause(0);
3756
3757 vm_page_lock_queues();
3758 lck_mtx_lock(&vm_page_queue_free_lock);
3759
3760 RESET_STATE_OF_RUN();
3761 /*
3762 * reset our free page limit since we
3763 * dropped the lock protecting the vm_page_free_queue
3764 */
3765 free_available = vm_page_free_count - vm_page_free_reserved;
3766 considered = 0;
3767 #if MACH_ASSERT
3768 yielded++;
3769 #endif
3770 goto retry;
3771 }
3772 considered++;
3773 }
3774 m = VM_PAGE_NULL;
3775
3776 if (npages != contig_pages) {
3777 if (!wrapped) {
3778 /*
3779 * We didn't find a contiguous range but we didn't
3780 * start from the very first page.
3781 * Start again from the very first page.
3782 */
3783 RESET_STATE_OF_RUN();
3784 if( flags & KMA_LOMEM)
3785 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3786 else
3787 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3788 last_idx = 0;
3789 page_idx = last_idx;
3790 wrapped = TRUE;
3791 goto retry;
3792 }
3793 lck_mtx_unlock(&vm_page_queue_free_lock);
3794 } else {
3795 vm_page_t m1;
3796 vm_page_t m2;
3797 unsigned int cur_idx;
3798 unsigned int tmp_start_idx;
3799 vm_object_t locked_object = VM_OBJECT_NULL;
3800 boolean_t abort_run = FALSE;
3801
3802 assert(page_idx - start_idx == contig_pages);
3803
3804 tmp_start_idx = start_idx;
3805
3806 /*
3807 * first pass through to pull the free pages
3808 * off of the free queue so that in case we
3809 * need substitute pages, we won't grab any
3810 * of the free pages in the run... we'll clear
3811 * the 'free' bit in the 2nd pass, and even in
3812 * an abort_run case, we'll collect all of the
3813 * free pages in this run and return them to the free list
3814 */
3815 while (start_idx < page_idx) {
3816
3817 m1 = &vm_pages[start_idx++];
3818
3819 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3820 assert(m1->free);
3821 #endif
3822
3823 if (m1->free) {
3824 unsigned int color;
3825
3826 color = m1->phys_page & vm_color_mask;
3827 #if MACH_ASSERT
3828 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3829 #endif
3830 queue_remove(&vm_page_queue_free[color],
3831 m1,
3832 vm_page_t,
3833 pageq);
3834 m1->pageq.next = NULL;
3835 m1->pageq.prev = NULL;
3836 #if MACH_ASSERT
3837 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3838 #endif
3839 /*
3840 * Clear the "free" bit so that this page
3841 * does not get considered for another
3842 * concurrent physically-contiguous allocation.
3843 */
3844 m1->free = FALSE;
3845 assert(m1->busy);
3846
3847 vm_page_free_count--;
3848 }
3849 }
3850 /*
3851 * adjust global freelist counts
3852 */
3853 if (vm_page_free_count < vm_page_free_count_minimum)
3854 vm_page_free_count_minimum = vm_page_free_count;
3855
3856 if( flags & KMA_LOMEM)
3857 vm_page_lomem_find_contiguous_last_idx = page_idx;
3858 else
3859 vm_page_find_contiguous_last_idx = page_idx;
3860
3861 /*
3862 * we can drop the free queue lock at this point since
3863 * we've pulled any 'free' candidates off of the list
3864 * we need it dropped so that we can do a vm_page_grab
3865 * when substituing for pmapped/dirty pages
3866 */
3867 lck_mtx_unlock(&vm_page_queue_free_lock);
3868
3869 start_idx = tmp_start_idx;
3870 cur_idx = page_idx - 1;
3871
3872 while (start_idx++ < page_idx) {
3873 /*
3874 * must go through the list from back to front
3875 * so that the page list is created in the
3876 * correct order - low -> high phys addresses
3877 */
3878 m1 = &vm_pages[cur_idx--];
3879
3880 assert(!m1->free);
3881 if (m1->object == VM_OBJECT_NULL) {
3882 /*
3883 * page has already been removed from
3884 * the free list in the 1st pass
3885 */
3886 assert(m1->offset == (vm_object_offset_t) -1);
3887 assert(m1->busy);
3888 assert(!m1->wanted);
3889 assert(!m1->laundry);
3890 } else {
3891 vm_object_t object;
3892
3893 if (abort_run == TRUE)
3894 continue;
3895
3896 object = m1->object;
3897
3898 if (object != locked_object) {
3899 if (locked_object) {
3900 vm_object_unlock(locked_object);
3901 locked_object = VM_OBJECT_NULL;
3902 }
3903 if (vm_object_lock_try(object))
3904 locked_object = object;
3905 }
3906 if (locked_object == VM_OBJECT_NULL ||
3907 (VM_PAGE_WIRED(m1) || m1->gobbled ||
3908 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3909 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3910 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3911
3912 if (locked_object) {
3913 vm_object_unlock(locked_object);
3914 locked_object = VM_OBJECT_NULL;
3915 }
3916 tmp_start_idx = cur_idx;
3917 abort_run = TRUE;
3918 continue;
3919 }
3920 if (m1->pmapped || m1->dirty) {
3921 int refmod;
3922 vm_object_offset_t offset;
3923
3924 m2 = vm_page_grab();
3925
3926 if (m2 == VM_PAGE_NULL) {
3927 if (locked_object) {
3928 vm_object_unlock(locked_object);
3929 locked_object = VM_OBJECT_NULL;
3930 }
3931 tmp_start_idx = cur_idx;
3932 abort_run = TRUE;
3933 continue;
3934 }
3935 if (m1->pmapped)
3936 refmod = pmap_disconnect(m1->phys_page);
3937 else
3938 refmod = 0;
3939 vm_page_copy(m1, m2);
3940
3941 m2->reference = m1->reference;
3942 m2->dirty = m1->dirty;
3943
3944 if (refmod & VM_MEM_REFERENCED)
3945 m2->reference = TRUE;
3946 if (refmod & VM_MEM_MODIFIED)
3947 m2->dirty = TRUE;
3948 offset = m1->offset;
3949
3950 /*
3951 * completely cleans up the state
3952 * of the page so that it is ready
3953 * to be put onto the free list, or
3954 * for this purpose it looks like it
3955 * just came off of the free list
3956 */
3957 vm_page_free_prepare(m1);
3958
3959 /*
3960 * make sure we clear the ref/mod state
3961 * from the pmap layer... else we risk
3962 * inheriting state from the last time
3963 * this page was used...
3964 */
3965 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3966 /*
3967 * now put the substitute page on the object
3968 */
3969 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
3970
3971 if (m2->reference)
3972 vm_page_activate(m2);
3973 else
3974 vm_page_deactivate(m2);
3975
3976 PAGE_WAKEUP_DONE(m2);
3977
3978 } else {
3979 /*
3980 * completely cleans up the state
3981 * of the page so that it is ready
3982 * to be put onto the free list, or
3983 * for this purpose it looks like it
3984 * just came off of the free list
3985 */
3986 vm_page_free_prepare(m1);
3987 }
3988 #if MACH_ASSERT
3989 stolen_pages++;
3990 #endif
3991 }
3992 m1->pageq.next = (queue_entry_t) m;
3993 m1->pageq.prev = NULL;
3994 m = m1;
3995 }
3996 if (locked_object) {
3997 vm_object_unlock(locked_object);
3998 locked_object = VM_OBJECT_NULL;
3999 }
4000
4001 if (abort_run == TRUE) {
4002 if (m != VM_PAGE_NULL) {
4003 vm_page_free_list(m, FALSE);
4004 }
4005 #if MACH_ASSERT
4006 dumped_run++;
4007 #endif
4008 /*
4009 * want the index of the last
4010 * page in this run that was
4011 * successfully 'stolen', so back
4012 * it up 1 for the auto-decrement on use
4013 * and 1 more to bump back over this page
4014 */
4015 page_idx = tmp_start_idx + 2;
4016 if (page_idx >= vm_pages_count) {
4017 if (wrapped)
4018 goto done_scanning;
4019 page_idx = last_idx = 0;
4020 wrapped = TRUE;
4021 }
4022 abort_run = FALSE;
4023
4024 /*
4025 * We didn't find a contiguous range but we didn't
4026 * start from the very first page.
4027 * Start again from the very first page.
4028 */
4029 RESET_STATE_OF_RUN();
4030
4031 if( flags & KMA_LOMEM)
4032 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4033 else
4034 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4035
4036 last_idx = page_idx;
4037
4038 lck_mtx_lock(&vm_page_queue_free_lock);
4039 /*
4040 * reset our free page limit since we
4041 * dropped the lock protecting the vm_page_free_queue
4042 */
4043 free_available = vm_page_free_count - vm_page_free_reserved;
4044 goto retry;
4045 }
4046
4047 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4048
4049 if (wire == TRUE)
4050 m1->wire_count++;
4051 else
4052 m1->gobbled = TRUE;
4053 }
4054 if (wire == FALSE)
4055 vm_page_gobble_count += npages;
4056
4057 /*
4058 * gobbled pages are also counted as wired pages
4059 */
4060 vm_page_wire_count += npages;
4061
4062 assert(vm_page_verify_contiguous(m, npages));
4063 }
4064 done_scanning:
4065 vm_page_unlock_queues();
4066
4067 #if DEBUG
4068 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4069
4070 tv_end_sec -= tv_start_sec;
4071 if (tv_end_usec < tv_start_usec) {
4072 tv_end_sec--;
4073 tv_end_usec += 1000000;
4074 }
4075 tv_end_usec -= tv_start_usec;
4076 if (tv_end_usec >= 1000000) {
4077 tv_end_sec++;
4078 tv_end_sec -= 1000000;
4079 }
4080 if (vm_page_find_contig_debug) {
4081 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4082 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4083 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4084 scanned, yielded, dumped_run, stolen_pages);
4085 }
4086
4087 #endif
4088 #if MACH_ASSERT
4089 vm_page_verify_free_lists();
4090 #endif
4091 return m;
4092 }
4093
4094 /*
4095 * Allocate a list of contiguous, wired pages.
4096 */
4097 kern_return_t
4098 cpm_allocate(
4099 vm_size_t size,
4100 vm_page_t *list,
4101 ppnum_t max_pnum,
4102 ppnum_t pnum_mask,
4103 boolean_t wire,
4104 int flags)
4105 {
4106 vm_page_t pages;
4107 unsigned int npages;
4108
4109 if (size % PAGE_SIZE != 0)
4110 return KERN_INVALID_ARGUMENT;
4111
4112 npages = (unsigned int) (size / PAGE_SIZE);
4113 if (npages != size / PAGE_SIZE) {
4114 /* 32-bit overflow */
4115 return KERN_INVALID_ARGUMENT;
4116 }
4117
4118 /*
4119 * Obtain a pointer to a subset of the free
4120 * list large enough to satisfy the request;
4121 * the region will be physically contiguous.
4122 */
4123 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4124
4125 if (pages == VM_PAGE_NULL)
4126 return KERN_NO_SPACE;
4127 /*
4128 * determine need for wakeups
4129 */
4130 if ((vm_page_free_count < vm_page_free_min) ||
4131 ((vm_page_free_count < vm_page_free_target) &&
4132 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4133 thread_wakeup((event_t) &vm_page_free_wanted);
4134
4135 VM_CHECK_MEMORYSTATUS;
4136
4137 /*
4138 * The CPM pages should now be available and
4139 * ordered by ascending physical address.
4140 */
4141 assert(vm_page_verify_contiguous(pages, npages));
4142
4143 *list = pages;
4144 return KERN_SUCCESS;
4145 }
4146
4147
4148 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4149
4150 /*
4151 * when working on a 'run' of pages, it is necessary to hold
4152 * the vm_page_queue_lock (a hot global lock) for certain operations
4153 * on the page... however, the majority of the work can be done
4154 * while merely holding the object lock... in fact there are certain
4155 * collections of pages that don't require any work brokered by the
4156 * vm_page_queue_lock... to mitigate the time spent behind the global
4157 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4158 * while doing all of the work that doesn't require the vm_page_queue_lock...
4159 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4160 * necessary work for each page... we will grab the busy bit on the page
4161 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4162 * if it can't immediately take the vm_page_queue_lock in order to compete
4163 * for the locks in the same order that vm_pageout_scan takes them.
4164 * the operation names are modeled after the names of the routines that
4165 * need to be called in order to make the changes very obvious in the
4166 * original loop
4167 */
4168
4169 void
4170 vm_page_do_delayed_work(
4171 vm_object_t object,
4172 struct vm_page_delayed_work *dwp,
4173 int dw_count)
4174 {
4175 int j;
4176 vm_page_t m;
4177 vm_page_t local_free_q = VM_PAGE_NULL;
4178 boolean_t dropped_obj_lock = FALSE;
4179
4180 /*
4181 * pageout_scan takes the vm_page_lock_queues first
4182 * then tries for the object lock... to avoid what
4183 * is effectively a lock inversion, we'll go to the
4184 * trouble of taking them in that same order... otherwise
4185 * if this object contains the majority of the pages resident
4186 * in the UBC (or a small set of large objects actively being
4187 * worked on contain the majority of the pages), we could
4188 * cause the pageout_scan thread to 'starve' in its attempt
4189 * to find pages to move to the free queue, since it has to
4190 * successfully acquire the object lock of any candidate page
4191 * before it can steal/clean it.
4192 */
4193 if (!vm_page_trylockspin_queues()) {
4194 vm_object_unlock(object);
4195
4196 vm_page_lockspin_queues();
4197
4198 for (j = 0; ; j++) {
4199 if (!vm_object_lock_avoid(object) &&
4200 _vm_object_lock_try(object))
4201 break;
4202 vm_page_unlock_queues();
4203 mutex_pause(j);
4204 vm_page_lockspin_queues();
4205 }
4206 dropped_obj_lock = TRUE;
4207 }
4208 for (j = 0; j < dw_count; j++, dwp++) {
4209
4210 m = dwp->dw_m;
4211
4212 if (dwp->dw_mask & DW_set_list_req_pending) {
4213 m->list_req_pending = TRUE;
4214
4215 if (dropped_obj_lock == TRUE) {
4216 /*
4217 * need to make sure anyone that might have
4218 * blocked on busy == TRUE when we dropped
4219 * the object lock gets a chance to re-evaluate
4220 * its state since we have several places
4221 * where we avoid potential deadlocks with
4222 * the fileysystem by stealing pages with
4223 * list_req_pending == TRUE and busy == TRUE
4224 */
4225 dwp->dw_mask |= DW_PAGE_WAKEUP;
4226 }
4227 }
4228 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4229 vm_pageout_throttle_up(m);
4230
4231 if (dwp->dw_mask & DW_vm_page_wire)
4232 vm_page_wire(m);
4233 else if (dwp->dw_mask & DW_vm_page_unwire) {
4234 boolean_t queueit;
4235
4236 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4237
4238 vm_page_unwire(m, queueit);
4239 }
4240 if (dwp->dw_mask & DW_vm_page_free) {
4241 vm_page_free_prepare_queues(m);
4242
4243 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4244 /*
4245 * Add this page to our list of reclaimed pages,
4246 * to be freed later.
4247 */
4248 m->pageq.next = (queue_entry_t) local_free_q;
4249 local_free_q = m;
4250 } else {
4251 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4252 vm_page_deactivate_internal(m, FALSE);
4253 else if (dwp->dw_mask & DW_vm_page_activate) {
4254 if (m->active == FALSE) {
4255 vm_page_activate(m);
4256 }
4257 }
4258 else if (dwp->dw_mask & DW_vm_page_speculate)
4259 vm_page_speculate(m, TRUE);
4260 else if (dwp->dw_mask & DW_vm_page_lru)
4261 vm_page_lru(m);
4262 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
4263 VM_PAGE_QUEUES_REMOVE(m);
4264
4265 if (dwp->dw_mask & DW_set_reference)
4266 m->reference = TRUE;
4267 else if (dwp->dw_mask & DW_clear_reference)
4268 m->reference = FALSE;
4269
4270 if (dwp->dw_mask & DW_move_page) {
4271 VM_PAGE_QUEUES_REMOVE(m);
4272
4273 assert(!m->laundry);
4274 assert(m->object != kernel_object);
4275 assert(m->pageq.next == NULL &&
4276 m->pageq.prev == NULL);
4277
4278 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4279 }
4280 if (dwp->dw_mask & DW_clear_busy)
4281 m->busy = FALSE;
4282
4283 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4284 PAGE_WAKEUP(m);
4285 }
4286 }
4287 vm_page_unlock_queues();
4288
4289 if (local_free_q)
4290 vm_page_free_list(local_free_q, TRUE);
4291
4292 VM_CHECK_MEMORYSTATUS;
4293
4294 }
4295
4296
4297
4298
4299 void vm_check_memorystatus()
4300 {
4301 #if CONFIG_EMBEDDED
4302 static boolean_t in_critical = FALSE;
4303 static unsigned int last_memorystatus = 0;
4304 unsigned int pages_avail;
4305
4306 if (!kern_memorystatus_delta) {
4307 return;
4308 }
4309
4310 pages_avail = (vm_page_active_count +
4311 vm_page_inactive_count +
4312 vm_page_speculative_count +
4313 vm_page_free_count +
4314 (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
4315 if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
4316 (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
4317 (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
4318 kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
4319 last_memorystatus = pages_avail;
4320
4321 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4322
4323 in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
4324 }
4325 #endif
4326 }
4327
4328 kern_return_t
4329 vm_page_alloc_list(
4330 int page_count,
4331 int flags,
4332 vm_page_t *list)
4333 {
4334 vm_page_t lo_page_list = VM_PAGE_NULL;
4335 vm_page_t mem;
4336 int i;
4337
4338 if ( !(flags & KMA_LOMEM))
4339 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4340
4341 for (i = 0; i < page_count; i++) {
4342
4343 mem = vm_page_grablo();
4344
4345 if (mem == VM_PAGE_NULL) {
4346 if (lo_page_list)
4347 vm_page_free_list(lo_page_list, FALSE);
4348
4349 *list = VM_PAGE_NULL;
4350
4351 return (KERN_RESOURCE_SHORTAGE);
4352 }
4353 mem->pageq.next = (queue_entry_t) lo_page_list;
4354 lo_page_list = mem;
4355 }
4356 *list = lo_page_list;
4357
4358 return (KERN_SUCCESS);
4359 }
4360
4361 void
4362 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4363 {
4364 page->offset = offset;
4365 }
4366
4367 vm_page_t
4368 vm_page_get_next(vm_page_t page)
4369 {
4370 return ((vm_page_t) page->pageq.next);
4371 }
4372
4373 vm_object_offset_t
4374 vm_page_get_offset(vm_page_t page)
4375 {
4376 return (page->offset);
4377 }
4378
4379 ppnum_t
4380 vm_page_get_phys_page(vm_page_t page)
4381 {
4382 return (page->phys_page);
4383 }
4384
4385
4386 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4387
4388 #if HIBERNATION
4389
4390 static vm_page_t hibernate_gobble_queue;
4391
4392 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4393
4394 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4395 static int hibernate_flush_dirty_pages(void);
4396 static int hibernate_flush_queue(queue_head_t *, int);
4397 static void hibernate_dirty_page(vm_page_t);
4398
4399 void hibernate_flush_wait(void);
4400 void hibernate_mark_in_progress(void);
4401 void hibernate_clear_in_progress(void);
4402
4403
4404 struct hibernate_statistics {
4405 int hibernate_considered;
4406 int hibernate_reentered_on_q;
4407 int hibernate_found_dirty;
4408 int hibernate_skipped_cleaning;
4409 int hibernate_skipped_transient;
4410 int hibernate_skipped_precious;
4411 int hibernate_queue_nolock;
4412 int hibernate_queue_paused;
4413 int hibernate_throttled;
4414 int hibernate_throttle_timeout;
4415 int hibernate_drained;
4416 int hibernate_drain_timeout;
4417 int cd_lock_failed;
4418 int cd_found_precious;
4419 int cd_found_wired;
4420 int cd_found_busy;
4421 int cd_found_unusual;
4422 int cd_found_cleaning;
4423 int cd_found_laundry;
4424 int cd_found_dirty;
4425 int cd_local_free;
4426 int cd_total_free;
4427 int cd_vm_page_wire_count;
4428 int cd_pages;
4429 int cd_discarded;
4430 int cd_count_wire;
4431 } hibernate_stats;
4432
4433
4434
4435 static int
4436 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4437 {
4438 wait_result_t wait_result;
4439
4440 vm_page_lock_queues();
4441
4442 while (q->pgo_laundry) {
4443
4444 q->pgo_draining = TRUE;
4445
4446 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4447
4448 vm_page_unlock_queues();
4449
4450 wait_result = thread_block(THREAD_CONTINUE_NULL);
4451
4452 if (wait_result == THREAD_TIMED_OUT) {
4453 hibernate_stats.hibernate_drain_timeout++;
4454 return (1);
4455 }
4456 vm_page_lock_queues();
4457
4458 hibernate_stats.hibernate_drained++;
4459 }
4460 vm_page_unlock_queues();
4461
4462 return (0);
4463 }
4464
4465 static void
4466 hibernate_dirty_page(vm_page_t m)
4467 {
4468 vm_object_t object = m->object;
4469 struct vm_pageout_queue *q;
4470
4471 #if DEBUG
4472 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4473 #endif
4474 vm_object_lock_assert_exclusive(object);
4475
4476 /*
4477 * protect the object from collapse -
4478 * locking in the object's paging_offset.
4479 */
4480 vm_object_paging_begin(object);
4481
4482 m->list_req_pending = TRUE;
4483 m->cleaning = TRUE;
4484 m->busy = TRUE;
4485
4486 if (object->internal == TRUE)
4487 q = &vm_pageout_queue_internal;
4488 else
4489 q = &vm_pageout_queue_external;
4490
4491 /*
4492 * pgo_laundry count is tied to the laundry bit
4493 */
4494 m->laundry = TRUE;
4495 q->pgo_laundry++;
4496
4497 m->pageout_queue = TRUE;
4498 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4499
4500 if (q->pgo_idle == TRUE) {
4501 q->pgo_idle = FALSE;
4502 thread_wakeup((event_t) &q->pgo_pending);
4503 }
4504 }
4505
4506 static int
4507 hibernate_flush_queue(queue_head_t *q, int qcount)
4508 {
4509 vm_page_t m;
4510 vm_object_t l_object = NULL;
4511 vm_object_t m_object = NULL;
4512 int refmod_state = 0;
4513 int try_failed_count = 0;
4514 int retval = 0;
4515 int current_run = 0;
4516 struct vm_pageout_queue *iq;
4517 struct vm_pageout_queue *eq;
4518 struct vm_pageout_queue *tq;
4519
4520
4521 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4522
4523 iq = &vm_pageout_queue_internal;
4524 eq = &vm_pageout_queue_external;
4525
4526 vm_page_lock_queues();
4527
4528 while (qcount && !queue_empty(q)) {
4529
4530 if (current_run++ == 1000) {
4531 if (hibernate_should_abort()) {
4532 retval = 1;
4533 break;
4534 }
4535 current_run = 0;
4536 }
4537
4538 m = (vm_page_t) queue_first(q);
4539 m_object = m->object;
4540
4541 /*
4542 * check to see if we currently are working
4543 * with the same object... if so, we've
4544 * already got the lock
4545 */
4546 if (m_object != l_object) {
4547 /*
4548 * the object associated with candidate page is
4549 * different from the one we were just working
4550 * with... dump the lock if we still own it
4551 */
4552 if (l_object != NULL) {
4553 vm_object_unlock(l_object);
4554 l_object = NULL;
4555 }
4556 /*
4557 * Try to lock object; since we've alread got the
4558 * page queues lock, we can only 'try' for this one.
4559 * if the 'try' fails, we need to do a mutex_pause
4560 * to allow the owner of the object lock a chance to
4561 * run...
4562 */
4563 if ( !vm_object_lock_try_scan(m_object)) {
4564
4565 if (try_failed_count > 20) {
4566 hibernate_stats.hibernate_queue_nolock++;
4567
4568 goto reenter_pg_on_q;
4569 }
4570 vm_pageout_scan_wants_object = m_object;
4571
4572 vm_page_unlock_queues();
4573 mutex_pause(try_failed_count++);
4574 vm_page_lock_queues();
4575
4576 hibernate_stats.hibernate_queue_paused++;
4577 continue;
4578 } else {
4579 l_object = m_object;
4580 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4581 }
4582 }
4583 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4584 /*
4585 * page is not to be cleaned
4586 * put it back on the head of its queue
4587 */
4588 if (m->cleaning)
4589 hibernate_stats.hibernate_skipped_cleaning++;
4590 else
4591 hibernate_stats.hibernate_skipped_transient++;
4592
4593 goto reenter_pg_on_q;
4594 }
4595 if ( !m_object->pager_initialized && m_object->pager_created)
4596 goto reenter_pg_on_q;
4597
4598 if (m_object->copy == VM_OBJECT_NULL) {
4599 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4600 /*
4601 * let the normal hibernate image path
4602 * deal with these
4603 */
4604 goto reenter_pg_on_q;
4605 }
4606 }
4607 if ( !m->dirty && m->pmapped) {
4608 refmod_state = pmap_get_refmod(m->phys_page);
4609
4610 if ((refmod_state & VM_MEM_MODIFIED))
4611 m->dirty = TRUE;
4612 } else
4613 refmod_state = 0;
4614
4615 if ( !m->dirty) {
4616 /*
4617 * page is not to be cleaned
4618 * put it back on the head of its queue
4619 */
4620 if (m->precious)
4621 hibernate_stats.hibernate_skipped_precious++;
4622
4623 goto reenter_pg_on_q;
4624 }
4625 tq = NULL;
4626
4627 if (m_object->internal) {
4628 if (VM_PAGE_Q_THROTTLED(iq))
4629 tq = iq;
4630 } else if (VM_PAGE_Q_THROTTLED(eq))
4631 tq = eq;
4632
4633 if (tq != NULL) {
4634 wait_result_t wait_result;
4635 int wait_count = 5;
4636
4637 if (l_object != NULL) {
4638 vm_object_unlock(l_object);
4639 l_object = NULL;
4640 }
4641 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4642
4643 tq->pgo_throttled = TRUE;
4644
4645 while (retval == 0) {
4646
4647 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4648
4649 vm_page_unlock_queues();
4650
4651 wait_result = thread_block(THREAD_CONTINUE_NULL);
4652
4653 vm_page_lock_queues();
4654
4655 if (hibernate_should_abort())
4656 retval = 1;
4657
4658 if (wait_result != THREAD_TIMED_OUT)
4659 break;
4660
4661 if (--wait_count == 0) {
4662 hibernate_stats.hibernate_throttle_timeout++;
4663 retval = 1;
4664 }
4665 }
4666 if (retval)
4667 break;
4668
4669 hibernate_stats.hibernate_throttled++;
4670
4671 continue;
4672 }
4673 VM_PAGE_QUEUES_REMOVE(m);
4674
4675 hibernate_dirty_page(m);
4676
4677 hibernate_stats.hibernate_found_dirty++;
4678
4679 goto next_pg;
4680
4681 reenter_pg_on_q:
4682 queue_remove(q, m, vm_page_t, pageq);
4683 queue_enter(q, m, vm_page_t, pageq);
4684
4685 hibernate_stats.hibernate_reentered_on_q++;
4686 next_pg:
4687 hibernate_stats.hibernate_considered++;
4688
4689 qcount--;
4690 try_failed_count = 0;
4691 }
4692 if (l_object != NULL) {
4693 vm_object_unlock(l_object);
4694 l_object = NULL;
4695 }
4696 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4697
4698 vm_page_unlock_queues();
4699
4700 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4701
4702 return (retval);
4703 }
4704
4705
4706 static int
4707 hibernate_flush_dirty_pages()
4708 {
4709 struct vm_speculative_age_q *aq;
4710 uint32_t i;
4711
4712 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4713
4714 if (vm_page_local_q) {
4715 for (i = 0; i < vm_page_local_q_count; i++)
4716 vm_page_reactivate_local(i, TRUE, FALSE);
4717 }
4718
4719 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4720 int qcount;
4721 vm_page_t m;
4722
4723 aq = &vm_page_queue_speculative[i];
4724
4725 if (queue_empty(&aq->age_q))
4726 continue;
4727 qcount = 0;
4728
4729 vm_page_lockspin_queues();
4730
4731 queue_iterate(&aq->age_q,
4732 m,
4733 vm_page_t,
4734 pageq)
4735 {
4736 qcount++;
4737 }
4738 vm_page_unlock_queues();
4739
4740 if (qcount) {
4741 if (hibernate_flush_queue(&aq->age_q, qcount))
4742 return (1);
4743 }
4744 }
4745 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4746 return (1);
4747 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4748 return (1);
4749 if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4750 return (1);
4751
4752 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4753 return (1);
4754 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4755 }
4756
4757
4758 extern void IOSleep(unsigned int);
4759 extern int sync_internal(void);
4760
4761 int
4762 hibernate_flush_memory()
4763 {
4764 int retval;
4765
4766 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4767
4768 IOSleep(2 * 1000);
4769
4770 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4771
4772 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4773 if (consider_buffer_cache_collect != NULL) {
4774
4775 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4776
4777 sync_internal();
4778 (void)(*consider_buffer_cache_collect)(1);
4779 consider_zone_gc(1);
4780
4781 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4782 }
4783 }
4784 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4785
4786 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4787 hibernate_stats.hibernate_considered,
4788 hibernate_stats.hibernate_reentered_on_q,
4789 hibernate_stats.hibernate_found_dirty);
4790 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4791 hibernate_stats.hibernate_skipped_cleaning,
4792 hibernate_stats.hibernate_skipped_transient,
4793 hibernate_stats.hibernate_skipped_precious,
4794 hibernate_stats.hibernate_queue_nolock);
4795 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4796 hibernate_stats.hibernate_queue_paused,
4797 hibernate_stats.hibernate_throttled,
4798 hibernate_stats.hibernate_throttle_timeout,
4799 hibernate_stats.hibernate_drained,
4800 hibernate_stats.hibernate_drain_timeout);
4801
4802 return (retval);
4803 }
4804
4805
4806 static void
4807 hibernate_page_list_zero(hibernate_page_list_t *list)
4808 {
4809 uint32_t bank;
4810 hibernate_bitmap_t * bitmap;
4811
4812 bitmap = &list->bank_bitmap[0];
4813 for (bank = 0; bank < list->bank_count; bank++)
4814 {
4815 uint32_t last_bit;
4816
4817 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4818 // set out-of-bound bits at end of bitmap.
4819 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4820 if (last_bit)
4821 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4822
4823 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4824 }
4825 }
4826
4827 void
4828 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4829 {
4830 uint32_t i;
4831 vm_page_t m;
4832 uint64_t start, end, timeout, nsec;
4833 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4834 clock_get_uptime(&start);
4835
4836 for (i = 0; i < gobble_count; i++)
4837 {
4838 while (VM_PAGE_NULL == (m = vm_page_grab()))
4839 {
4840 clock_get_uptime(&end);
4841 if (end >= timeout)
4842 break;
4843 VM_PAGE_WAIT();
4844 }
4845 if (!m)
4846 break;
4847 m->busy = FALSE;
4848 vm_page_gobble(m);
4849
4850 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4851 hibernate_gobble_queue = m;
4852 }
4853
4854 clock_get_uptime(&end);
4855 absolutetime_to_nanoseconds(end - start, &nsec);
4856 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4857 }
4858
4859 void
4860 hibernate_free_gobble_pages(void)
4861 {
4862 vm_page_t m, next;
4863 uint32_t count = 0;
4864
4865 m = (vm_page_t) hibernate_gobble_queue;
4866 while(m)
4867 {
4868 next = (vm_page_t) m->pageq.next;
4869 vm_page_free(m);
4870 count++;
4871 m = next;
4872 }
4873 hibernate_gobble_queue = VM_PAGE_NULL;
4874
4875 if (count)
4876 HIBLOG("Freed %d pages\n", count);
4877 }
4878
4879 static boolean_t
4880 hibernate_consider_discard(vm_page_t m)
4881 {
4882 vm_object_t object = NULL;
4883 int refmod_state;
4884 boolean_t discard = FALSE;
4885
4886 do
4887 {
4888 if (m->private)
4889 panic("hibernate_consider_discard: private");
4890
4891 if (!vm_object_lock_try(m->object)) {
4892 hibernate_stats.cd_lock_failed++;
4893 break;
4894 }
4895 object = m->object;
4896
4897 if (VM_PAGE_WIRED(m)) {
4898 hibernate_stats.cd_found_wired++;
4899 break;
4900 }
4901 if (m->precious) {
4902 hibernate_stats.cd_found_precious++;
4903 break;
4904 }
4905 if (m->busy || !object->alive) {
4906 /*
4907 * Somebody is playing with this page.
4908 */
4909 hibernate_stats.cd_found_busy++;
4910 break;
4911 }
4912 if (m->absent || m->unusual || m->error) {
4913 /*
4914 * If it's unusual in anyway, ignore it
4915 */
4916 hibernate_stats.cd_found_unusual++;
4917 break;
4918 }
4919 if (m->cleaning) {
4920 hibernate_stats.cd_found_cleaning++;
4921 break;
4922 }
4923 if (m->laundry || m->list_req_pending) {
4924 hibernate_stats.cd_found_laundry++;
4925 break;
4926 }
4927 if (!m->dirty)
4928 {
4929 refmod_state = pmap_get_refmod(m->phys_page);
4930
4931 if (refmod_state & VM_MEM_REFERENCED)
4932 m->reference = TRUE;
4933 if (refmod_state & VM_MEM_MODIFIED)
4934 m->dirty = TRUE;
4935 }
4936
4937 /*
4938 * If it's clean or purgeable we can discard the page on wakeup.
4939 */
4940 discard = (!m->dirty)
4941 || (VM_PURGABLE_VOLATILE == object->purgable)
4942 || (VM_PURGABLE_EMPTY == object->purgable);
4943
4944 if (discard == FALSE)
4945 hibernate_stats.cd_found_dirty++;
4946 }
4947 while (FALSE);
4948
4949 if (object)
4950 vm_object_unlock(object);
4951
4952 return (discard);
4953 }
4954
4955
4956 static void
4957 hibernate_discard_page(vm_page_t m)
4958 {
4959 if (m->absent || m->unusual || m->error)
4960 /*
4961 * If it's unusual in anyway, ignore
4962 */
4963 return;
4964
4965 if (m->pmapped == TRUE)
4966 {
4967 __unused int refmod_state = pmap_disconnect(m->phys_page);
4968 }
4969
4970 if (m->laundry)
4971 panic("hibernate_discard_page(%p) laundry", m);
4972 if (m->private)
4973 panic("hibernate_discard_page(%p) private", m);
4974 if (m->fictitious)
4975 panic("hibernate_discard_page(%p) fictitious", m);
4976
4977 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4978 {
4979 /* object should be on a queue */
4980 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4981 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4982 assert(old_queue);
4983 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4984 makes sure these locks are uncontended before sleep */
4985 vm_purgeable_token_delete_first(old_queue);
4986 m->object->purgable = VM_PURGABLE_EMPTY;
4987 }
4988
4989 vm_page_free(m);
4990 }
4991
4992 /*
4993 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4994 pages known to VM to not need saving are subtracted.
4995 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4996 */
4997
4998 void
4999 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5000 hibernate_page_list_t * page_list_wired,
5001 hibernate_page_list_t * page_list_pal,
5002 uint32_t * pagesOut)
5003 {
5004 uint64_t start, end, nsec;
5005 vm_page_t m;
5006 uint32_t pages = page_list->page_count;
5007 uint32_t count_zf = 0, count_throttled = 0;
5008 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
5009 uint32_t count_wire = pages;
5010 uint32_t count_discard_active = 0;
5011 uint32_t count_discard_inactive = 0;
5012 uint32_t count_discard_purgeable = 0;
5013 uint32_t count_discard_speculative = 0;
5014 uint32_t i;
5015 uint32_t bank;
5016 hibernate_bitmap_t * bitmap;
5017 hibernate_bitmap_t * bitmap_wired;
5018
5019
5020 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5021
5022 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5023
5024 clock_get_uptime(&start);
5025
5026 hibernate_page_list_zero(page_list);
5027 hibernate_page_list_zero(page_list_wired);
5028 hibernate_page_list_zero(page_list_pal);
5029
5030 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5031 hibernate_stats.cd_pages = pages;
5032
5033 if (vm_page_local_q) {
5034 for (i = 0; i < vm_page_local_q_count; i++)
5035 vm_page_reactivate_local(i, TRUE, TRUE);
5036 }
5037
5038 m = (vm_page_t) hibernate_gobble_queue;
5039 while(m)
5040 {
5041 pages--;
5042 count_wire--;
5043 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5044 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5045 m = (vm_page_t) m->pageq.next;
5046 }
5047
5048 for( i = 0; i < real_ncpus; i++ )
5049 {
5050 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5051 {
5052 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5053 {
5054 pages--;
5055 count_wire--;
5056 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5057 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5058
5059 hibernate_stats.cd_local_free++;
5060 hibernate_stats.cd_total_free++;
5061 }
5062 }
5063 }
5064
5065 for( i = 0; i < vm_colors; i++ )
5066 {
5067 queue_iterate(&vm_page_queue_free[i],
5068 m,
5069 vm_page_t,
5070 pageq)
5071 {
5072 pages--;
5073 count_wire--;
5074 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5075 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5076
5077 hibernate_stats.cd_total_free++;
5078 }
5079 }
5080
5081 queue_iterate(&vm_lopage_queue_free,
5082 m,
5083 vm_page_t,
5084 pageq)
5085 {
5086 pages--;
5087 count_wire--;
5088 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5089 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5090
5091 hibernate_stats.cd_total_free++;
5092 }
5093
5094 queue_iterate( &vm_page_queue_throttled,
5095 m,
5096 vm_page_t,
5097 pageq )
5098 {
5099 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5100 && hibernate_consider_discard(m))
5101 {
5102 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5103 count_discard_inactive++;
5104 }
5105 else
5106 count_throttled++;
5107 count_wire--;
5108 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5109 }
5110
5111 queue_iterate( &vm_page_queue_zf,
5112 m,
5113 vm_page_t,
5114 pageq )
5115 {
5116 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5117 && hibernate_consider_discard(m))
5118 {
5119 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5120 if (m->dirty)
5121 count_discard_purgeable++;
5122 else
5123 count_discard_inactive++;
5124 }
5125 else
5126 count_zf++;
5127 count_wire--;
5128 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5129 }
5130
5131 queue_iterate( &vm_page_queue_inactive,
5132 m,
5133 vm_page_t,
5134 pageq )
5135 {
5136 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5137 && hibernate_consider_discard(m))
5138 {
5139 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5140 if (m->dirty)
5141 count_discard_purgeable++;
5142 else
5143 count_discard_inactive++;
5144 }
5145 else
5146 count_inactive++;
5147 count_wire--;
5148 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5149 }
5150
5151 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5152 {
5153 queue_iterate(&vm_page_queue_speculative[i].age_q,
5154 m,
5155 vm_page_t,
5156 pageq)
5157 {
5158 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5159 && hibernate_consider_discard(m))
5160 {
5161 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5162 count_discard_speculative++;
5163 }
5164 else
5165 count_speculative++;
5166 count_wire--;
5167 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5168 }
5169 }
5170
5171 queue_iterate( &vm_page_queue_active,
5172 m,
5173 vm_page_t,
5174 pageq )
5175 {
5176 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5177 && hibernate_consider_discard(m))
5178 {
5179 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5180 if (m->dirty)
5181 count_discard_purgeable++;
5182 else
5183 count_discard_active++;
5184 }
5185 else
5186 count_active++;
5187 count_wire--;
5188 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5189 }
5190
5191 // pull wired from hibernate_bitmap
5192
5193 bitmap = &page_list->bank_bitmap[0];
5194 bitmap_wired = &page_list_wired->bank_bitmap[0];
5195 for (bank = 0; bank < page_list->bank_count; bank++)
5196 {
5197 for (i = 0; i < bitmap->bitmapwords; i++)
5198 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5199 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5200 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5201 }
5202
5203 // machine dependent adjustments
5204 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5205
5206 hibernate_stats.cd_count_wire = count_wire;
5207 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5208
5209 clock_get_uptime(&end);
5210 absolutetime_to_nanoseconds(end - start, &nsec);
5211 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5212
5213 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5214 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5215 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5216
5217 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5218
5219 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5220 }
5221
5222 void
5223 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5224 {
5225 uint64_t start, end, nsec;
5226 vm_page_t m;
5227 vm_page_t next;
5228 uint32_t i;
5229 uint32_t count_discard_active = 0;
5230 uint32_t count_discard_inactive = 0;
5231 uint32_t count_discard_purgeable = 0;
5232 uint32_t count_discard_speculative = 0;
5233
5234 clock_get_uptime(&start);
5235
5236 m = (vm_page_t) queue_first(&vm_page_queue_zf);
5237 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5238 {
5239 next = (vm_page_t) m->pageq.next;
5240 if (hibernate_page_bittst(page_list, m->phys_page))
5241 {
5242 if (m->dirty)
5243 count_discard_purgeable++;
5244 else
5245 count_discard_inactive++;
5246 hibernate_discard_page(m);
5247 }
5248 m = next;
5249 }
5250
5251 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5252 {
5253 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5254 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5255 {
5256 next = (vm_page_t) m->pageq.next;
5257 if (hibernate_page_bittst(page_list, m->phys_page))
5258 {
5259 count_discard_speculative++;
5260 hibernate_discard_page(m);
5261 }
5262 m = next;
5263 }
5264 }
5265
5266 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5267 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5268 {
5269 next = (vm_page_t) m->pageq.next;
5270 if (hibernate_page_bittst(page_list, m->phys_page))
5271 {
5272 if (m->dirty)
5273 count_discard_purgeable++;
5274 else
5275 count_discard_inactive++;
5276 hibernate_discard_page(m);
5277 }
5278 m = next;
5279 }
5280
5281 m = (vm_page_t) queue_first(&vm_page_queue_active);
5282 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5283 {
5284 next = (vm_page_t) m->pageq.next;
5285 if (hibernate_page_bittst(page_list, m->phys_page))
5286 {
5287 if (m->dirty)
5288 count_discard_purgeable++;
5289 else
5290 count_discard_active++;
5291 hibernate_discard_page(m);
5292 }
5293 m = next;
5294 }
5295
5296 clock_get_uptime(&end);
5297 absolutetime_to_nanoseconds(end - start, &nsec);
5298 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5299 nsec / 1000000ULL,
5300 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5301 }
5302
5303 #endif /* HIBERNATION */
5304
5305 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5306
5307 #include <mach_vm_debug.h>
5308 #if MACH_VM_DEBUG
5309
5310 #include <mach_debug/hash_info.h>
5311 #include <vm/vm_debug.h>
5312
5313 /*
5314 * Routine: vm_page_info
5315 * Purpose:
5316 * Return information about the global VP table.
5317 * Fills the buffer with as much information as possible
5318 * and returns the desired size of the buffer.
5319 * Conditions:
5320 * Nothing locked. The caller should provide
5321 * possibly-pageable memory.
5322 */
5323
5324 unsigned int
5325 vm_page_info(
5326 hash_info_bucket_t *info,
5327 unsigned int count)
5328 {
5329 unsigned int i;
5330 lck_spin_t *bucket_lock;
5331
5332 if (vm_page_bucket_count < count)
5333 count = vm_page_bucket_count;
5334
5335 for (i = 0; i < count; i++) {
5336 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5337 unsigned int bucket_count = 0;
5338 vm_page_t m;
5339
5340 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5341 lck_spin_lock(bucket_lock);
5342
5343 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5344 bucket_count++;
5345
5346 lck_spin_unlock(bucket_lock);
5347
5348 /* don't touch pageable memory while holding locks */
5349 info[i].hib_count = bucket_count;
5350 }
5351
5352 return vm_page_bucket_count;
5353 }
5354 #endif /* MACH_VM_DEBUG */
5355
5356 #include <mach_kdb.h>
5357 #if MACH_KDB
5358
5359 #include <ddb/db_output.h>
5360 #include <vm/vm_print.h>
5361 #define printf kdbprintf
5362
5363 /*
5364 * Routine: vm_page_print [exported]
5365 */
5366 void
5367 vm_page_print(
5368 db_addr_t db_addr)
5369 {
5370 vm_page_t p;
5371
5372 p = (vm_page_t) (long) db_addr;
5373
5374 iprintf("page 0x%x\n", p);
5375
5376 db_indent += 2;
5377
5378 iprintf("object=0x%x", p->object);
5379 printf(", offset=0x%x", p->offset);
5380 printf(", wire_count=%d", p->wire_count);
5381
5382 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5383 (p->local ? "" : "!"),
5384 (p->inactive ? "" : "!"),
5385 (p->active ? "" : "!"),
5386 (p->throttled ? "" : "!"),
5387 (p->gobbled ? "" : "!"),
5388 (p->laundry ? "" : "!"),
5389 (p->free ? "" : "!"),
5390 (p->reference ? "" : "!"),
5391 (p->encrypted ? "" : "!"));
5392 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5393 (p->busy ? "" : "!"),
5394 (p->wanted ? "" : "!"),
5395 (p->tabled ? "" : "!"),
5396 (p->fictitious ? "" : "!"),
5397 (p->private ? "" : "!"),
5398 (p->precious ? "" : "!"));
5399 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5400 (p->absent ? "" : "!"),
5401 (p->error ? "" : "!"),
5402 (p->dirty ? "" : "!"),
5403 (p->cleaning ? "" : "!"),
5404 (p->pageout ? "" : "!"),
5405 (p->clustered ? "" : "!"));
5406 iprintf("%soverwriting, %srestart, %sunusual\n",
5407 (p->overwriting ? "" : "!"),
5408 (p->restart ? "" : "!"),
5409 (p->unusual ? "" : "!"));
5410
5411 iprintf("phys_page=0x%x", p->phys_page);
5412
5413 db_indent -= 2;
5414 }
5415 #endif /* MACH_KDB */