]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-1699.32.7.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <pexpert/pexpert.h>
89
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
93
94 #include <IOKit/IOHibernatePrivate.h>
95
96
97 #include <sys/kern_memorystatus.h>
98
99 #include <sys/kdebug.h>
100
101 boolean_t vm_page_free_verify = TRUE;
102
103 uint32_t vm_lopage_free_count = 0;
104 uint32_t vm_lopage_free_limit = 0;
105 uint32_t vm_lopage_lowater = 0;
106 boolean_t vm_lopage_refill = FALSE;
107 boolean_t vm_lopage_needed = FALSE;
108
109 lck_mtx_ext_t vm_page_queue_lock_ext;
110 lck_mtx_ext_t vm_page_queue_free_lock_ext;
111 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
112
113 int speculative_age_index = 0;
114 int speculative_steal_index = 0;
115 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
116
117
118 __private_extern__ void vm_page_init_lck_grp(void);
119
120 static void vm_page_free_prepare(vm_page_t page);
121 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
122
123
124
125
126 /*
127 * Associated with page of user-allocatable memory is a
128 * page structure.
129 */
130
131 /*
132 * These variables record the values returned by vm_page_bootstrap,
133 * for debugging purposes. The implementation of pmap_steal_memory
134 * and pmap_startup here also uses them internally.
135 */
136
137 vm_offset_t virtual_space_start;
138 vm_offset_t virtual_space_end;
139 uint32_t vm_page_pages;
140
141 /*
142 * The vm_page_lookup() routine, which provides for fast
143 * (virtual memory object, offset) to page lookup, employs
144 * the following hash table. The vm_page_{insert,remove}
145 * routines install and remove associations in the table.
146 * [This table is often called the virtual-to-physical,
147 * or VP, table.]
148 */
149 typedef struct {
150 vm_page_t pages;
151 #if MACH_PAGE_HASH_STATS
152 int cur_count; /* current count */
153 int hi_count; /* high water mark */
154 #endif /* MACH_PAGE_HASH_STATS */
155 } vm_page_bucket_t;
156
157
158 #define BUCKETS_PER_LOCK 16
159
160 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
161 unsigned int vm_page_bucket_count = 0; /* How big is array? */
162 unsigned int vm_page_hash_mask; /* Mask for hash function */
163 unsigned int vm_page_hash_shift; /* Shift for hash function */
164 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
165 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
166
167 lck_spin_t *vm_page_bucket_locks;
168
169
170 #if MACH_PAGE_HASH_STATS
171 /* This routine is only for debug. It is intended to be called by
172 * hand by a developer using a kernel debugger. This routine prints
173 * out vm_page_hash table statistics to the kernel debug console.
174 */
175 void
176 hash_debug(void)
177 {
178 int i;
179 int numbuckets = 0;
180 int highsum = 0;
181 int maxdepth = 0;
182
183 for (i = 0; i < vm_page_bucket_count; i++) {
184 if (vm_page_buckets[i].hi_count) {
185 numbuckets++;
186 highsum += vm_page_buckets[i].hi_count;
187 if (vm_page_buckets[i].hi_count > maxdepth)
188 maxdepth = vm_page_buckets[i].hi_count;
189 }
190 }
191 printf("Total number of buckets: %d\n", vm_page_bucket_count);
192 printf("Number used buckets: %d = %d%%\n",
193 numbuckets, 100*numbuckets/vm_page_bucket_count);
194 printf("Number unused buckets: %d = %d%%\n",
195 vm_page_bucket_count - numbuckets,
196 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
197 printf("Sum of bucket max depth: %d\n", highsum);
198 printf("Average bucket depth: %d.%2d\n",
199 highsum/vm_page_bucket_count,
200 highsum%vm_page_bucket_count);
201 printf("Maximum bucket depth: %d\n", maxdepth);
202 }
203 #endif /* MACH_PAGE_HASH_STATS */
204
205 /*
206 * The virtual page size is currently implemented as a runtime
207 * variable, but is constant once initialized using vm_set_page_size.
208 * This initialization must be done in the machine-dependent
209 * bootstrap sequence, before calling other machine-independent
210 * initializations.
211 *
212 * All references to the virtual page size outside this
213 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
214 * constants.
215 */
216 vm_size_t page_size = PAGE_SIZE;
217 vm_size_t page_mask = PAGE_MASK;
218 int page_shift = PAGE_SHIFT;
219
220 /*
221 * Resident page structures are initialized from
222 * a template (see vm_page_alloc).
223 *
224 * When adding a new field to the virtual memory
225 * object structure, be sure to add initialization
226 * (see vm_page_bootstrap).
227 */
228 struct vm_page vm_page_template;
229
230 vm_page_t vm_pages = VM_PAGE_NULL;
231 unsigned int vm_pages_count = 0;
232 ppnum_t vm_page_lowest = 0;
233
234 /*
235 * Resident pages that represent real memory
236 * are allocated from a set of free lists,
237 * one per color.
238 */
239 unsigned int vm_colors;
240 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
241 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
242 queue_head_t vm_page_queue_free[MAX_COLORS];
243 unsigned int vm_page_free_wanted;
244 unsigned int vm_page_free_wanted_privileged;
245 unsigned int vm_page_free_count;
246 unsigned int vm_page_fictitious_count;
247
248 unsigned int vm_page_free_count_minimum; /* debugging */
249
250 /*
251 * Occasionally, the virtual memory system uses
252 * resident page structures that do not refer to
253 * real pages, for example to leave a page with
254 * important state information in the VP table.
255 *
256 * These page structures are allocated the way
257 * most other kernel structures are.
258 */
259 zone_t vm_page_zone;
260 vm_locks_array_t vm_page_locks;
261 decl_lck_mtx_data(,vm_page_alloc_lock)
262 unsigned int io_throttle_zero_fill;
263
264 unsigned int vm_page_local_q_count = 0;
265 unsigned int vm_page_local_q_soft_limit = 250;
266 unsigned int vm_page_local_q_hard_limit = 500;
267 struct vplq *vm_page_local_q = NULL;
268
269 /*
270 * Fictitious pages don't have a physical address,
271 * but we must initialize phys_page to something.
272 * For debugging, this should be a strange value
273 * that the pmap module can recognize in assertions.
274 */
275 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
276
277 /*
278 * Guard pages are not accessible so they don't
279 * need a physical address, but we need to enter
280 * one in the pmap.
281 * Let's make it recognizable and make sure that
282 * we don't use a real physical page with that
283 * physical address.
284 */
285 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
286
287 /*
288 * Resident page structures are also chained on
289 * queues that are used by the page replacement
290 * system (pageout daemon). These queues are
291 * defined here, but are shared by the pageout
292 * module. The inactive queue is broken into
293 * inactive and zf for convenience as the
294 * pageout daemon often assignes a higher
295 * affinity to zf pages
296 */
297 queue_head_t vm_page_queue_active;
298 queue_head_t vm_page_queue_inactive;
299 queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
300 queue_head_t vm_page_queue_throttled;
301
302 unsigned int vm_page_active_count;
303 unsigned int vm_page_inactive_count;
304 unsigned int vm_page_throttled_count;
305 unsigned int vm_page_speculative_count;
306 unsigned int vm_page_wire_count;
307 unsigned int vm_page_wire_count_initial;
308 unsigned int vm_page_gobble_count = 0;
309 unsigned int vm_page_wire_count_warning = 0;
310 unsigned int vm_page_gobble_count_warning = 0;
311
312 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
313 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
314 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
315
316 #if DEVELOPMENT || DEBUG
317 unsigned int vm_page_speculative_recreated = 0;
318 unsigned int vm_page_speculative_created = 0;
319 unsigned int vm_page_speculative_used = 0;
320 #endif
321
322 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
323 ppnum_t max_valid_low_ppnum = 0xffffffff;
324
325
326 /*
327 * Several page replacement parameters are also
328 * shared with this module, so that page allocation
329 * (done here in vm_page_alloc) can trigger the
330 * pageout daemon.
331 */
332 unsigned int vm_page_free_target = 0;
333 unsigned int vm_page_free_min = 0;
334 unsigned int vm_page_throttle_limit = 0;
335 uint32_t vm_page_creation_throttle = 0;
336 unsigned int vm_page_inactive_target = 0;
337 unsigned int vm_page_inactive_min = 0;
338 unsigned int vm_page_free_reserved = 0;
339 unsigned int vm_page_throttle_count = 0;
340
341 /*
342 * The VM system has a couple of heuristics for deciding
343 * that pages are "uninteresting" and should be placed
344 * on the inactive queue as likely candidates for replacement.
345 * These variables let the heuristics be controlled at run-time
346 * to make experimentation easier.
347 */
348
349 boolean_t vm_page_deactivate_hint = TRUE;
350
351 struct vm_page_stats_reusable vm_page_stats_reusable;
352
353 /*
354 * vm_set_page_size:
355 *
356 * Sets the page size, perhaps based upon the memory
357 * size. Must be called before any use of page-size
358 * dependent functions.
359 *
360 * Sets page_shift and page_mask from page_size.
361 */
362 void
363 vm_set_page_size(void)
364 {
365 page_mask = page_size - 1;
366
367 if ((page_mask & page_size) != 0)
368 panic("vm_set_page_size: page size not a power of two");
369
370 for (page_shift = 0; ; page_shift++)
371 if ((1U << page_shift) == page_size)
372 break;
373 }
374
375
376 /* Called once during statup, once the cache geometry is known.
377 */
378 static void
379 vm_page_set_colors( void )
380 {
381 unsigned int n, override;
382
383 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
384 n = override;
385 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
386 n = vm_cache_geometry_colors;
387 else n = DEFAULT_COLORS; /* use default if all else fails */
388
389 if ( n == 0 )
390 n = 1;
391 if ( n > MAX_COLORS )
392 n = MAX_COLORS;
393
394 /* the count must be a power of 2 */
395 if ( ( n & (n - 1)) != 0 )
396 panic("vm_page_set_colors");
397
398 vm_colors = n;
399 vm_color_mask = n - 1;
400 }
401
402
403 lck_grp_t vm_page_lck_grp_free;
404 lck_grp_t vm_page_lck_grp_queue;
405 lck_grp_t vm_page_lck_grp_local;
406 lck_grp_t vm_page_lck_grp_purge;
407 lck_grp_t vm_page_lck_grp_alloc;
408 lck_grp_t vm_page_lck_grp_bucket;
409 lck_grp_attr_t vm_page_lck_grp_attr;
410 lck_attr_t vm_page_lck_attr;
411
412
413 __private_extern__ void
414 vm_page_init_lck_grp(void)
415 {
416 /*
417 * initialze the vm_page lock world
418 */
419 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
420 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
421 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
422 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
423 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
424 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
425 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
426 lck_attr_setdefault(&vm_page_lck_attr);
427 }
428
429 void
430 vm_page_init_local_q()
431 {
432 unsigned int num_cpus;
433 unsigned int i;
434 struct vplq *t_local_q;
435
436 num_cpus = ml_get_max_cpus();
437
438 /*
439 * no point in this for a uni-processor system
440 */
441 if (num_cpus >= 2) {
442 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
443
444 for (i = 0; i < num_cpus; i++) {
445 struct vpl *lq;
446
447 lq = &t_local_q[i].vpl_un.vpl;
448 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
449 queue_init(&lq->vpl_queue);
450 lq->vpl_count = 0;
451 }
452 vm_page_local_q_count = num_cpus;
453
454 vm_page_local_q = (struct vplq *)t_local_q;
455 }
456 }
457
458
459 /*
460 * vm_page_bootstrap:
461 *
462 * Initializes the resident memory module.
463 *
464 * Allocates memory for the page cells, and
465 * for the object/offset-to-page hash table headers.
466 * Each page cell is initialized and placed on the free list.
467 * Returns the range of available kernel virtual memory.
468 */
469
470 void
471 vm_page_bootstrap(
472 vm_offset_t *startp,
473 vm_offset_t *endp)
474 {
475 register vm_page_t m;
476 unsigned int i;
477 unsigned int log1;
478 unsigned int log2;
479 unsigned int size;
480
481 /*
482 * Initialize the vm_page template.
483 */
484
485 m = &vm_page_template;
486 bzero(m, sizeof (*m));
487
488 m->pageq.next = NULL;
489 m->pageq.prev = NULL;
490 m->listq.next = NULL;
491 m->listq.prev = NULL;
492 m->next = VM_PAGE_NULL;
493
494 m->object = VM_OBJECT_NULL; /* reset later */
495 m->offset = (vm_object_offset_t) -1; /* reset later */
496
497 m->wire_count = 0;
498 m->local = FALSE;
499 m->inactive = FALSE;
500 m->active = FALSE;
501 m->pageout_queue = FALSE;
502 m->speculative = FALSE;
503 m->laundry = FALSE;
504 m->free = FALSE;
505 m->reference = FALSE;
506 m->gobbled = FALSE;
507 m->private = FALSE;
508 m->throttled = FALSE;
509 m->__unused_pageq_bits = 0;
510
511 m->phys_page = 0; /* reset later */
512
513 m->busy = TRUE;
514 m->wanted = FALSE;
515 m->tabled = FALSE;
516 m->fictitious = FALSE;
517 m->pmapped = FALSE;
518 m->wpmapped = FALSE;
519 m->pageout = FALSE;
520 m->absent = FALSE;
521 m->error = FALSE;
522 m->dirty = FALSE;
523 m->cleaning = FALSE;
524 m->precious = FALSE;
525 m->clustered = FALSE;
526 m->overwriting = FALSE;
527 m->restart = FALSE;
528 m->unusual = FALSE;
529 m->encrypted = FALSE;
530 m->encrypted_cleaning = FALSE;
531 m->list_req_pending = FALSE;
532 m->dump_cleaning = FALSE;
533 m->cs_validated = FALSE;
534 m->cs_tainted = FALSE;
535 m->no_cache = FALSE;
536 m->zero_fill = FALSE;
537 m->reusable = FALSE;
538 m->slid = FALSE;
539 m->__unused_object_bits = 0;
540
541
542 /*
543 * Initialize the page queues.
544 */
545 vm_page_init_lck_grp();
546
547 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
548 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
549 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
550
551 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
552 int group;
553
554 purgeable_queues[i].token_q_head = 0;
555 purgeable_queues[i].token_q_tail = 0;
556 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
557 queue_init(&purgeable_queues[i].objq[group]);
558
559 purgeable_queues[i].type = i;
560 purgeable_queues[i].new_pages = 0;
561 #if MACH_ASSERT
562 purgeable_queues[i].debug_count_tokens = 0;
563 purgeable_queues[i].debug_count_objects = 0;
564 #endif
565 };
566
567 for (i = 0; i < MAX_COLORS; i++ )
568 queue_init(&vm_page_queue_free[i]);
569
570 queue_init(&vm_lopage_queue_free);
571 queue_init(&vm_page_queue_active);
572 queue_init(&vm_page_queue_inactive);
573 queue_init(&vm_page_queue_throttled);
574 queue_init(&vm_page_queue_zf);
575
576 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
577 queue_init(&vm_page_queue_speculative[i].age_q);
578
579 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
580 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
581 }
582 vm_page_free_wanted = 0;
583 vm_page_free_wanted_privileged = 0;
584
585 vm_page_set_colors();
586
587
588 /*
589 * Steal memory for the map and zone subsystems.
590 */
591
592 vm_map_steal_memory();
593 zone_steal_memory();
594
595 /*
596 * Allocate (and initialize) the virtual-to-physical
597 * table hash buckets.
598 *
599 * The number of buckets should be a power of two to
600 * get a good hash function. The following computation
601 * chooses the first power of two that is greater
602 * than the number of physical pages in the system.
603 */
604
605 if (vm_page_bucket_count == 0) {
606 unsigned int npages = pmap_free_pages();
607
608 vm_page_bucket_count = 1;
609 while (vm_page_bucket_count < npages)
610 vm_page_bucket_count <<= 1;
611 }
612 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
613
614 vm_page_hash_mask = vm_page_bucket_count - 1;
615
616 /*
617 * Calculate object shift value for hashing algorithm:
618 * O = log2(sizeof(struct vm_object))
619 * B = log2(vm_page_bucket_count)
620 * hash shifts the object left by
621 * B/2 - O
622 */
623 size = vm_page_bucket_count;
624 for (log1 = 0; size > 1; log1++)
625 size /= 2;
626 size = sizeof(struct vm_object);
627 for (log2 = 0; size > 1; log2++)
628 size /= 2;
629 vm_page_hash_shift = log1/2 - log2 + 1;
630
631 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
632 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
633 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
634
635 if (vm_page_hash_mask & vm_page_bucket_count)
636 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
637
638 vm_page_buckets = (vm_page_bucket_t *)
639 pmap_steal_memory(vm_page_bucket_count *
640 sizeof(vm_page_bucket_t));
641
642 vm_page_bucket_locks = (lck_spin_t *)
643 pmap_steal_memory(vm_page_bucket_lock_count *
644 sizeof(lck_spin_t));
645
646 for (i = 0; i < vm_page_bucket_count; i++) {
647 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
648
649 bucket->pages = VM_PAGE_NULL;
650 #if MACH_PAGE_HASH_STATS
651 bucket->cur_count = 0;
652 bucket->hi_count = 0;
653 #endif /* MACH_PAGE_HASH_STATS */
654 }
655
656 for (i = 0; i < vm_page_bucket_lock_count; i++)
657 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
658
659 /*
660 * Machine-dependent code allocates the resident page table.
661 * It uses vm_page_init to initialize the page frames.
662 * The code also returns to us the virtual space available
663 * to the kernel. We don't trust the pmap module
664 * to get the alignment right.
665 */
666
667 pmap_startup(&virtual_space_start, &virtual_space_end);
668 virtual_space_start = round_page(virtual_space_start);
669 virtual_space_end = trunc_page(virtual_space_end);
670
671 *startp = virtual_space_start;
672 *endp = virtual_space_end;
673
674 /*
675 * Compute the initial "wire" count.
676 * Up until now, the pages which have been set aside are not under
677 * the VM system's control, so although they aren't explicitly
678 * wired, they nonetheless can't be moved. At this moment,
679 * all VM managed pages are "free", courtesy of pmap_startup.
680 */
681 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
682 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
683 vm_page_wire_count_initial = vm_page_wire_count;
684 vm_page_free_count_minimum = vm_page_free_count;
685
686 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
687 vm_page_free_count, vm_page_wire_count);
688
689 simple_lock_init(&vm_paging_lock, 0);
690 }
691
692 #ifndef MACHINE_PAGES
693 /*
694 * We implement pmap_steal_memory and pmap_startup with the help
695 * of two simpler functions, pmap_virtual_space and pmap_next_page.
696 */
697
698 void *
699 pmap_steal_memory(
700 vm_size_t size)
701 {
702 vm_offset_t addr, vaddr;
703 ppnum_t phys_page;
704
705 /*
706 * We round the size to a round multiple.
707 */
708
709 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
710
711 /*
712 * If this is the first call to pmap_steal_memory,
713 * we have to initialize ourself.
714 */
715
716 if (virtual_space_start == virtual_space_end) {
717 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
718
719 /*
720 * The initial values must be aligned properly, and
721 * we don't trust the pmap module to do it right.
722 */
723
724 virtual_space_start = round_page(virtual_space_start);
725 virtual_space_end = trunc_page(virtual_space_end);
726 }
727
728 /*
729 * Allocate virtual memory for this request.
730 */
731
732 addr = virtual_space_start;
733 virtual_space_start += size;
734
735 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
736
737 /*
738 * Allocate and map physical pages to back new virtual pages.
739 */
740
741 for (vaddr = round_page(addr);
742 vaddr < addr + size;
743 vaddr += PAGE_SIZE) {
744
745 if (!pmap_next_page_hi(&phys_page))
746 panic("pmap_steal_memory");
747
748 /*
749 * XXX Logically, these mappings should be wired,
750 * but some pmap modules barf if they are.
751 */
752 #if defined(__LP64__)
753 pmap_pre_expand(kernel_pmap, vaddr);
754 #endif
755
756 pmap_enter(kernel_pmap, vaddr, phys_page,
757 VM_PROT_READ|VM_PROT_WRITE,
758 VM_WIMG_USE_DEFAULT, FALSE);
759 /*
760 * Account for newly stolen memory
761 */
762 vm_page_wire_count++;
763
764 }
765
766 return (void *) addr;
767 }
768
769 void
770 pmap_startup(
771 vm_offset_t *startp,
772 vm_offset_t *endp)
773 {
774 unsigned int i, npages, pages_initialized, fill, fillval;
775 ppnum_t phys_page;
776 addr64_t tmpaddr;
777
778 /*
779 * We calculate how many page frames we will have
780 * and then allocate the page structures in one chunk.
781 */
782
783 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
784 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
785 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
786
787 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
788
789 /*
790 * Initialize the page frames.
791 */
792 for (i = 0, pages_initialized = 0; i < npages; i++) {
793 if (!pmap_next_page(&phys_page))
794 break;
795 if (pages_initialized == 0 || phys_page < vm_page_lowest)
796 vm_page_lowest = phys_page;
797
798 vm_page_init(&vm_pages[i], phys_page, FALSE);
799 vm_page_pages++;
800 pages_initialized++;
801 }
802 vm_pages_count = pages_initialized;
803
804 /*
805 * Check if we want to initialize pages to a known value
806 */
807 fill = 0; /* Assume no fill */
808 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
809
810 // -debug code remove
811 if (2 == vm_himemory_mode) {
812 // free low -> high so high is preferred
813 for (i = 1; i <= pages_initialized; i++) {
814 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
815 vm_page_release(&vm_pages[i - 1]);
816 }
817 }
818 else
819 // debug code remove-
820
821 /*
822 * Release pages in reverse order so that physical pages
823 * initially get allocated in ascending addresses. This keeps
824 * the devices (which must address physical memory) happy if
825 * they require several consecutive pages.
826 */
827 for (i = pages_initialized; i > 0; i--) {
828 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
829 vm_page_release(&vm_pages[i - 1]);
830 }
831
832 #if 0
833 {
834 vm_page_t xx, xxo, xxl;
835 int i, j, k, l;
836
837 j = 0; /* (BRINGUP) */
838 xxl = 0;
839
840 for( i = 0; i < vm_colors; i++ ) {
841 queue_iterate(&vm_page_queue_free[i],
842 xx,
843 vm_page_t,
844 pageq) { /* BRINGUP */
845 j++; /* (BRINGUP) */
846 if(j > vm_page_free_count) { /* (BRINGUP) */
847 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
848 }
849
850 l = vm_page_free_count - j; /* (BRINGUP) */
851 k = 0; /* (BRINGUP) */
852
853 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
854
855 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
856 k++;
857 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
858 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
859 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
860 }
861 }
862
863 xxl = xx;
864 }
865 }
866
867 if(j != vm_page_free_count) { /* (BRINGUP) */
868 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
869 }
870 }
871 #endif
872
873
874 /*
875 * We have to re-align virtual_space_start,
876 * because pmap_steal_memory has been using it.
877 */
878
879 virtual_space_start = round_page(virtual_space_start);
880
881 *startp = virtual_space_start;
882 *endp = virtual_space_end;
883 }
884 #endif /* MACHINE_PAGES */
885
886 /*
887 * Routine: vm_page_module_init
888 * Purpose:
889 * Second initialization pass, to be done after
890 * the basic VM system is ready.
891 */
892 void
893 vm_page_module_init(void)
894 {
895 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
896 0, PAGE_SIZE, "vm pages");
897
898 #if ZONE_DEBUG
899 zone_debug_disable(vm_page_zone);
900 #endif /* ZONE_DEBUG */
901
902 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
903 zone_change(vm_page_zone, Z_EXPAND, FALSE);
904 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
905 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
906
907 /*
908 * Adjust zone statistics to account for the real pages allocated
909 * in vm_page_create(). [Q: is this really what we want?]
910 */
911 vm_page_zone->count += vm_page_pages;
912 vm_page_zone->sum_count += vm_page_pages;
913 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
914
915 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
916 }
917
918 /*
919 * Routine: vm_page_create
920 * Purpose:
921 * After the VM system is up, machine-dependent code
922 * may stumble across more physical memory. For example,
923 * memory that it was reserving for a frame buffer.
924 * vm_page_create turns this memory into available pages.
925 */
926
927 void
928 vm_page_create(
929 ppnum_t start,
930 ppnum_t end)
931 {
932 ppnum_t phys_page;
933 vm_page_t m;
934
935 for (phys_page = start;
936 phys_page < end;
937 phys_page++) {
938 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
939 == VM_PAGE_NULL)
940 vm_page_more_fictitious();
941
942 m->fictitious = FALSE;
943 pmap_clear_noencrypt(phys_page);
944
945 vm_page_pages++;
946 vm_page_release(m);
947 }
948 }
949
950 /*
951 * vm_page_hash:
952 *
953 * Distributes the object/offset key pair among hash buckets.
954 *
955 * NOTE: The bucket count must be a power of 2
956 */
957 #define vm_page_hash(object, offset) (\
958 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
959 & vm_page_hash_mask)
960
961
962 /*
963 * vm_page_insert: [ internal use only ]
964 *
965 * Inserts the given mem entry into the object/object-page
966 * table and object list.
967 *
968 * The object must be locked.
969 */
970 void
971 vm_page_insert(
972 vm_page_t mem,
973 vm_object_t object,
974 vm_object_offset_t offset)
975 {
976 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
977 }
978
979 void
980 vm_page_insert_internal(
981 vm_page_t mem,
982 vm_object_t object,
983 vm_object_offset_t offset,
984 boolean_t queues_lock_held,
985 boolean_t insert_in_hash)
986 {
987 vm_page_bucket_t *bucket;
988 lck_spin_t *bucket_lock;
989 int hash_id;
990
991 XPR(XPR_VM_PAGE,
992 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
993 object, offset, mem, 0,0);
994
995 VM_PAGE_CHECK(mem);
996
997 if (object == vm_submap_object) {
998 /* the vm_submap_object is only a placeholder for submaps */
999 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1000 }
1001
1002 vm_object_lock_assert_exclusive(object);
1003 #if DEBUG
1004 lck_mtx_assert(&vm_page_queue_lock,
1005 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1006 : LCK_MTX_ASSERT_NOTOWNED);
1007 #endif /* DEBUG */
1008
1009 if (insert_in_hash == TRUE) {
1010 #if DEBUG
1011 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1012 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1013 "already in (obj=%p,off=0x%llx)",
1014 mem, object, offset, mem->object, mem->offset);
1015 #endif
1016 assert(!object->internal || offset < object->vo_size);
1017
1018 /* only insert "pageout" pages into "pageout" objects,
1019 * and normal pages into normal objects */
1020 assert(object->pageout == mem->pageout);
1021
1022 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1023
1024 /*
1025 * Record the object/offset pair in this page
1026 */
1027
1028 mem->object = object;
1029 mem->offset = offset;
1030
1031 /*
1032 * Insert it into the object_object/offset hash table
1033 */
1034 hash_id = vm_page_hash(object, offset);
1035 bucket = &vm_page_buckets[hash_id];
1036 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1037
1038 lck_spin_lock(bucket_lock);
1039
1040 mem->next = bucket->pages;
1041 bucket->pages = mem;
1042 #if MACH_PAGE_HASH_STATS
1043 if (++bucket->cur_count > bucket->hi_count)
1044 bucket->hi_count = bucket->cur_count;
1045 #endif /* MACH_PAGE_HASH_STATS */
1046
1047 lck_spin_unlock(bucket_lock);
1048 }
1049
1050 { unsigned int cache_attr;
1051
1052 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1053
1054 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1055 pmap_set_cache_attributes(mem->phys_page, cache_attr);
1056 object->set_cache_attr = TRUE;
1057 }
1058 }
1059 /*
1060 * Now link into the object's list of backed pages.
1061 */
1062
1063 VM_PAGE_INSERT(mem, object);
1064 mem->tabled = TRUE;
1065
1066 /*
1067 * Show that the object has one more resident page.
1068 */
1069
1070 object->resident_page_count++;
1071 if (VM_PAGE_WIRED(mem)) {
1072 object->wired_page_count++;
1073 }
1074 assert(object->resident_page_count >= object->wired_page_count);
1075
1076 assert(!mem->reusable);
1077
1078 if (object->purgable == VM_PURGABLE_VOLATILE) {
1079 if (VM_PAGE_WIRED(mem)) {
1080 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1081 } else {
1082 OSAddAtomic(1, &vm_page_purgeable_count);
1083 }
1084 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1085 mem->throttled) {
1086 /*
1087 * This page belongs to a purged VM object but hasn't
1088 * been purged (because it was "busy").
1089 * It's in the "throttled" queue and hence not
1090 * visible to vm_pageout_scan(). Move it to a pageable
1091 * queue, so that it can eventually be reclaimed, instead
1092 * of lingering in the "empty" object.
1093 */
1094 if (queues_lock_held == FALSE)
1095 vm_page_lockspin_queues();
1096 vm_page_deactivate(mem);
1097 if (queues_lock_held == FALSE)
1098 vm_page_unlock_queues();
1099 }
1100 }
1101
1102 /*
1103 * vm_page_replace:
1104 *
1105 * Exactly like vm_page_insert, except that we first
1106 * remove any existing page at the given offset in object.
1107 *
1108 * The object must be locked.
1109 */
1110 void
1111 vm_page_replace(
1112 register vm_page_t mem,
1113 register vm_object_t object,
1114 register vm_object_offset_t offset)
1115 {
1116 vm_page_bucket_t *bucket;
1117 vm_page_t found_m = VM_PAGE_NULL;
1118 lck_spin_t *bucket_lock;
1119 int hash_id;
1120
1121 VM_PAGE_CHECK(mem);
1122 vm_object_lock_assert_exclusive(object);
1123 #if DEBUG
1124 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1125 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1126 "already in (obj=%p,off=0x%llx)",
1127 mem, object, offset, mem->object, mem->offset);
1128 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1129 #endif
1130 /*
1131 * Record the object/offset pair in this page
1132 */
1133
1134 mem->object = object;
1135 mem->offset = offset;
1136
1137 /*
1138 * Insert it into the object_object/offset hash table,
1139 * replacing any page that might have been there.
1140 */
1141
1142 hash_id = vm_page_hash(object, offset);
1143 bucket = &vm_page_buckets[hash_id];
1144 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1145
1146 lck_spin_lock(bucket_lock);
1147
1148 if (bucket->pages) {
1149 vm_page_t *mp = &bucket->pages;
1150 vm_page_t m = *mp;
1151
1152 do {
1153 if (m->object == object && m->offset == offset) {
1154 /*
1155 * Remove old page from hash list
1156 */
1157 *mp = m->next;
1158
1159 found_m = m;
1160 break;
1161 }
1162 mp = &m->next;
1163 } while ((m = *mp));
1164
1165 mem->next = bucket->pages;
1166 } else {
1167 mem->next = VM_PAGE_NULL;
1168 }
1169 /*
1170 * insert new page at head of hash list
1171 */
1172 bucket->pages = mem;
1173
1174 lck_spin_unlock(bucket_lock);
1175
1176 if (found_m) {
1177 /*
1178 * there was already a page at the specified
1179 * offset for this object... remove it from
1180 * the object and free it back to the free list
1181 */
1182 vm_page_free_unlocked(found_m, FALSE);
1183 }
1184 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1185 }
1186
1187 /*
1188 * vm_page_remove: [ internal use only ]
1189 *
1190 * Removes the given mem entry from the object/offset-page
1191 * table and the object page list.
1192 *
1193 * The object must be locked.
1194 */
1195
1196 void
1197 vm_page_remove(
1198 vm_page_t mem,
1199 boolean_t remove_from_hash)
1200 {
1201 vm_page_bucket_t *bucket;
1202 vm_page_t this;
1203 lck_spin_t *bucket_lock;
1204 int hash_id;
1205
1206 XPR(XPR_VM_PAGE,
1207 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1208 mem->object, mem->offset,
1209 mem, 0,0);
1210
1211 vm_object_lock_assert_exclusive(mem->object);
1212 assert(mem->tabled);
1213 assert(!mem->cleaning);
1214 VM_PAGE_CHECK(mem);
1215
1216 if (remove_from_hash == TRUE) {
1217 /*
1218 * Remove from the object_object/offset hash table
1219 */
1220 hash_id = vm_page_hash(mem->object, mem->offset);
1221 bucket = &vm_page_buckets[hash_id];
1222 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1223
1224 lck_spin_lock(bucket_lock);
1225
1226 if ((this = bucket->pages) == mem) {
1227 /* optimize for common case */
1228
1229 bucket->pages = mem->next;
1230 } else {
1231 vm_page_t *prev;
1232
1233 for (prev = &this->next;
1234 (this = *prev) != mem;
1235 prev = &this->next)
1236 continue;
1237 *prev = this->next;
1238 }
1239 #if MACH_PAGE_HASH_STATS
1240 bucket->cur_count--;
1241 #endif /* MACH_PAGE_HASH_STATS */
1242
1243 lck_spin_unlock(bucket_lock);
1244 }
1245 /*
1246 * Now remove from the object's list of backed pages.
1247 */
1248
1249 VM_PAGE_REMOVE(mem);
1250
1251 /*
1252 * And show that the object has one fewer resident
1253 * page.
1254 */
1255
1256 assert(mem->object->resident_page_count > 0);
1257 mem->object->resident_page_count--;
1258
1259 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1260 if (mem->object->resident_page_count == 0)
1261 vm_object_cache_remove(mem->object);
1262 }
1263
1264 if (VM_PAGE_WIRED(mem)) {
1265 assert(mem->object->wired_page_count > 0);
1266 mem->object->wired_page_count--;
1267 }
1268 assert(mem->object->resident_page_count >=
1269 mem->object->wired_page_count);
1270 if (mem->reusable) {
1271 assert(mem->object->reusable_page_count > 0);
1272 mem->object->reusable_page_count--;
1273 assert(mem->object->reusable_page_count <=
1274 mem->object->resident_page_count);
1275 mem->reusable = FALSE;
1276 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1277 vm_page_stats_reusable.reused_remove++;
1278 } else if (mem->object->all_reusable) {
1279 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1280 vm_page_stats_reusable.reused_remove++;
1281 }
1282
1283 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1284 if (VM_PAGE_WIRED(mem)) {
1285 assert(vm_page_purgeable_wired_count > 0);
1286 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1287 } else {
1288 assert(vm_page_purgeable_count > 0);
1289 OSAddAtomic(-1, &vm_page_purgeable_count);
1290 }
1291 }
1292 if (mem->object->set_cache_attr == TRUE)
1293 pmap_set_cache_attributes(mem->phys_page, 0);
1294
1295 mem->tabled = FALSE;
1296 mem->object = VM_OBJECT_NULL;
1297 mem->offset = (vm_object_offset_t) -1;
1298 }
1299
1300
1301 /*
1302 * vm_page_lookup:
1303 *
1304 * Returns the page associated with the object/offset
1305 * pair specified; if none is found, VM_PAGE_NULL is returned.
1306 *
1307 * The object must be locked. No side effects.
1308 */
1309
1310 unsigned long vm_page_lookup_hint = 0;
1311 unsigned long vm_page_lookup_hint_next = 0;
1312 unsigned long vm_page_lookup_hint_prev = 0;
1313 unsigned long vm_page_lookup_hint_miss = 0;
1314 unsigned long vm_page_lookup_bucket_NULL = 0;
1315 unsigned long vm_page_lookup_miss = 0;
1316
1317
1318 vm_page_t
1319 vm_page_lookup(
1320 vm_object_t object,
1321 vm_object_offset_t offset)
1322 {
1323 vm_page_t mem;
1324 vm_page_bucket_t *bucket;
1325 queue_entry_t qe;
1326 lck_spin_t *bucket_lock;
1327 int hash_id;
1328
1329 vm_object_lock_assert_held(object);
1330 mem = object->memq_hint;
1331
1332 if (mem != VM_PAGE_NULL) {
1333 assert(mem->object == object);
1334
1335 if (mem->offset == offset) {
1336 vm_page_lookup_hint++;
1337 return mem;
1338 }
1339 qe = queue_next(&mem->listq);
1340
1341 if (! queue_end(&object->memq, qe)) {
1342 vm_page_t next_page;
1343
1344 next_page = (vm_page_t) qe;
1345 assert(next_page->object == object);
1346
1347 if (next_page->offset == offset) {
1348 vm_page_lookup_hint_next++;
1349 object->memq_hint = next_page; /* new hint */
1350 return next_page;
1351 }
1352 }
1353 qe = queue_prev(&mem->listq);
1354
1355 if (! queue_end(&object->memq, qe)) {
1356 vm_page_t prev_page;
1357
1358 prev_page = (vm_page_t) qe;
1359 assert(prev_page->object == object);
1360
1361 if (prev_page->offset == offset) {
1362 vm_page_lookup_hint_prev++;
1363 object->memq_hint = prev_page; /* new hint */
1364 return prev_page;
1365 }
1366 }
1367 }
1368 /*
1369 * Search the hash table for this object/offset pair
1370 */
1371 hash_id = vm_page_hash(object, offset);
1372 bucket = &vm_page_buckets[hash_id];
1373
1374 /*
1375 * since we hold the object lock, we are guaranteed that no
1376 * new pages can be inserted into this object... this in turn
1377 * guarantess that the page we're looking for can't exist
1378 * if the bucket it hashes to is currently NULL even when looked
1379 * at outside the scope of the hash bucket lock... this is a
1380 * really cheap optimiztion to avoid taking the lock
1381 */
1382 if (bucket->pages == VM_PAGE_NULL) {
1383 vm_page_lookup_bucket_NULL++;
1384
1385 return (VM_PAGE_NULL);
1386 }
1387 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1388
1389 lck_spin_lock(bucket_lock);
1390
1391 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1392 VM_PAGE_CHECK(mem);
1393 if ((mem->object == object) && (mem->offset == offset))
1394 break;
1395 }
1396 lck_spin_unlock(bucket_lock);
1397
1398 if (mem != VM_PAGE_NULL) {
1399 if (object->memq_hint != VM_PAGE_NULL) {
1400 vm_page_lookup_hint_miss++;
1401 }
1402 assert(mem->object == object);
1403 object->memq_hint = mem;
1404 } else
1405 vm_page_lookup_miss++;
1406
1407 return(mem);
1408 }
1409
1410
1411 /*
1412 * vm_page_rename:
1413 *
1414 * Move the given memory entry from its
1415 * current object to the specified target object/offset.
1416 *
1417 * The object must be locked.
1418 */
1419 void
1420 vm_page_rename(
1421 register vm_page_t mem,
1422 register vm_object_t new_object,
1423 vm_object_offset_t new_offset,
1424 boolean_t encrypted_ok)
1425 {
1426 assert(mem->object != new_object);
1427
1428 /*
1429 * ENCRYPTED SWAP:
1430 * The encryption key is based on the page's memory object
1431 * (aka "pager") and paging offset. Moving the page to
1432 * another VM object changes its "pager" and "paging_offset"
1433 * so it has to be decrypted first, or we would lose the key.
1434 *
1435 * One exception is VM object collapsing, where we transfer pages
1436 * from one backing object to its parent object. This operation also
1437 * transfers the paging information, so the <pager,paging_offset> info
1438 * should remain consistent. The caller (vm_object_do_collapse())
1439 * sets "encrypted_ok" in this case.
1440 */
1441 if (!encrypted_ok && mem->encrypted) {
1442 panic("vm_page_rename: page %p is encrypted\n", mem);
1443 }
1444
1445 XPR(XPR_VM_PAGE,
1446 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1447 new_object, new_offset,
1448 mem, 0,0);
1449
1450 /*
1451 * Changes to mem->object require the page lock because
1452 * the pageout daemon uses that lock to get the object.
1453 */
1454 vm_page_lockspin_queues();
1455
1456 vm_page_remove(mem, TRUE);
1457 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1458
1459 vm_page_unlock_queues();
1460 }
1461
1462 /*
1463 * vm_page_init:
1464 *
1465 * Initialize the fields in a new page.
1466 * This takes a structure with random values and initializes it
1467 * so that it can be given to vm_page_release or vm_page_insert.
1468 */
1469 void
1470 vm_page_init(
1471 vm_page_t mem,
1472 ppnum_t phys_page,
1473 boolean_t lopage)
1474 {
1475 assert(phys_page);
1476
1477 #if DEBUG
1478 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1479 if (!(pmap_valid_page(phys_page))) {
1480 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1481 }
1482 }
1483 #endif
1484 *mem = vm_page_template;
1485 mem->phys_page = phys_page;
1486 #if 0
1487 /*
1488 * we're leaving this turned off for now... currently pages
1489 * come off the free list and are either immediately dirtied/referenced
1490 * due to zero-fill or COW faults, or are used to read or write files...
1491 * in the file I/O case, the UPL mechanism takes care of clearing
1492 * the state of the HW ref/mod bits in a somewhat fragile way.
1493 * Since we may change the way this works in the future (to toughen it up),
1494 * I'm leaving this as a reminder of where these bits could get cleared
1495 */
1496
1497 /*
1498 * make sure both the h/w referenced and modified bits are
1499 * clear at this point... we are especially dependent on
1500 * not finding a 'stale' h/w modified in a number of spots
1501 * once this page goes back into use
1502 */
1503 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1504 #endif
1505 mem->lopage = lopage;
1506 }
1507
1508 /*
1509 * vm_page_grab_fictitious:
1510 *
1511 * Remove a fictitious page from the free list.
1512 * Returns VM_PAGE_NULL if there are no free pages.
1513 */
1514 int c_vm_page_grab_fictitious = 0;
1515 int c_vm_page_grab_fictitious_failed = 0;
1516 int c_vm_page_release_fictitious = 0;
1517 int c_vm_page_more_fictitious = 0;
1518
1519 vm_page_t
1520 vm_page_grab_fictitious_common(
1521 ppnum_t phys_addr)
1522 {
1523 vm_page_t m;
1524
1525 if ((m = (vm_page_t)zget(vm_page_zone))) {
1526
1527 vm_page_init(m, phys_addr, FALSE);
1528 m->fictitious = TRUE;
1529
1530 c_vm_page_grab_fictitious++;
1531 } else
1532 c_vm_page_grab_fictitious_failed++;
1533
1534 return m;
1535 }
1536
1537 vm_page_t
1538 vm_page_grab_fictitious(void)
1539 {
1540 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1541 }
1542
1543 vm_page_t
1544 vm_page_grab_guard(void)
1545 {
1546 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1547 }
1548
1549
1550 /*
1551 * vm_page_release_fictitious:
1552 *
1553 * Release a fictitious page to the zone pool
1554 */
1555 void
1556 vm_page_release_fictitious(
1557 vm_page_t m)
1558 {
1559 assert(!m->free);
1560 assert(m->fictitious);
1561 assert(m->phys_page == vm_page_fictitious_addr ||
1562 m->phys_page == vm_page_guard_addr);
1563
1564 c_vm_page_release_fictitious++;
1565
1566 zfree(vm_page_zone, m);
1567 }
1568
1569 /*
1570 * vm_page_more_fictitious:
1571 *
1572 * Add more fictitious pages to the zone.
1573 * Allowed to block. This routine is way intimate
1574 * with the zones code, for several reasons:
1575 * 1. we need to carve some page structures out of physical
1576 * memory before zones work, so they _cannot_ come from
1577 * the zone_map.
1578 * 2. the zone needs to be collectable in order to prevent
1579 * growth without bound. These structures are used by
1580 * the device pager (by the hundreds and thousands), as
1581 * private pages for pageout, and as blocking pages for
1582 * pagein. Temporary bursts in demand should not result in
1583 * permanent allocation of a resource.
1584 * 3. To smooth allocation humps, we allocate single pages
1585 * with kernel_memory_allocate(), and cram them into the
1586 * zone.
1587 */
1588
1589 void vm_page_more_fictitious(void)
1590 {
1591 vm_offset_t addr;
1592 kern_return_t retval;
1593
1594 c_vm_page_more_fictitious++;
1595
1596 /*
1597 * Allocate a single page from the zone_map. Do not wait if no physical
1598 * pages are immediately available, and do not zero the space. We need
1599 * our own blocking lock here to prevent having multiple,
1600 * simultaneous requests from piling up on the zone_map lock. Exactly
1601 * one (of our) threads should be potentially waiting on the map lock.
1602 * If winner is not vm-privileged, then the page allocation will fail,
1603 * and it will temporarily block here in the vm_page_wait().
1604 */
1605 lck_mtx_lock(&vm_page_alloc_lock);
1606 /*
1607 * If another thread allocated space, just bail out now.
1608 */
1609 if (zone_free_count(vm_page_zone) > 5) {
1610 /*
1611 * The number "5" is a small number that is larger than the
1612 * number of fictitious pages that any single caller will
1613 * attempt to allocate. Otherwise, a thread will attempt to
1614 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1615 * release all of the resources and locks already acquired,
1616 * and then call this routine. This routine finds the pages
1617 * that the caller released, so fails to allocate new space.
1618 * The process repeats infinitely. The largest known number
1619 * of fictitious pages required in this manner is 2. 5 is
1620 * simply a somewhat larger number.
1621 */
1622 lck_mtx_unlock(&vm_page_alloc_lock);
1623 return;
1624 }
1625
1626 retval = kernel_memory_allocate(zone_map,
1627 &addr, PAGE_SIZE, VM_PROT_ALL,
1628 KMA_KOBJECT|KMA_NOPAGEWAIT);
1629 if (retval != KERN_SUCCESS) {
1630 /*
1631 * No page was available. Drop the
1632 * lock to give another thread a chance at it, and
1633 * wait for the pageout daemon to make progress.
1634 */
1635 lck_mtx_unlock(&vm_page_alloc_lock);
1636 vm_page_wait(THREAD_UNINT);
1637 return;
1638 }
1639 zcram(vm_page_zone, addr, PAGE_SIZE);
1640
1641 lck_mtx_unlock(&vm_page_alloc_lock);
1642 }
1643
1644
1645 /*
1646 * vm_pool_low():
1647 *
1648 * Return true if it is not likely that a non-vm_privileged thread
1649 * can get memory without blocking. Advisory only, since the
1650 * situation may change under us.
1651 */
1652 int
1653 vm_pool_low(void)
1654 {
1655 /* No locking, at worst we will fib. */
1656 return( vm_page_free_count <= vm_page_free_reserved );
1657 }
1658
1659
1660
1661 /*
1662 * this is an interface to support bring-up of drivers
1663 * on platforms with physical memory > 4G...
1664 */
1665 int vm_himemory_mode = 0;
1666
1667
1668 /*
1669 * this interface exists to support hardware controllers
1670 * incapable of generating DMAs with more than 32 bits
1671 * of address on platforms with physical memory > 4G...
1672 */
1673 unsigned int vm_lopages_allocated_q = 0;
1674 unsigned int vm_lopages_allocated_cpm_success = 0;
1675 unsigned int vm_lopages_allocated_cpm_failed = 0;
1676 queue_head_t vm_lopage_queue_free;
1677
1678 vm_page_t
1679 vm_page_grablo(void)
1680 {
1681 vm_page_t mem;
1682
1683 if (vm_lopage_needed == FALSE)
1684 return (vm_page_grab());
1685
1686 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1687
1688 if ( !queue_empty(&vm_lopage_queue_free)) {
1689 queue_remove_first(&vm_lopage_queue_free,
1690 mem,
1691 vm_page_t,
1692 pageq);
1693 assert(vm_lopage_free_count);
1694
1695 vm_lopage_free_count--;
1696 vm_lopages_allocated_q++;
1697
1698 if (vm_lopage_free_count < vm_lopage_lowater)
1699 vm_lopage_refill = TRUE;
1700
1701 lck_mtx_unlock(&vm_page_queue_free_lock);
1702 } else {
1703 lck_mtx_unlock(&vm_page_queue_free_lock);
1704
1705 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1706
1707 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1708 vm_lopages_allocated_cpm_failed++;
1709 lck_mtx_unlock(&vm_page_queue_free_lock);
1710
1711 return (VM_PAGE_NULL);
1712 }
1713 mem->busy = TRUE;
1714
1715 vm_page_lockspin_queues();
1716
1717 mem->gobbled = FALSE;
1718 vm_page_gobble_count--;
1719 vm_page_wire_count--;
1720
1721 vm_lopages_allocated_cpm_success++;
1722 vm_page_unlock_queues();
1723 }
1724 assert(mem->busy);
1725 assert(!mem->free);
1726 assert(!mem->pmapped);
1727 assert(!mem->wpmapped);
1728 assert(!pmap_is_noencrypt(mem->phys_page));
1729
1730 mem->pageq.next = NULL;
1731 mem->pageq.prev = NULL;
1732
1733 return (mem);
1734 }
1735
1736
1737 /*
1738 * vm_page_grab:
1739 *
1740 * first try to grab a page from the per-cpu free list...
1741 * this must be done while pre-emption is disabled... if
1742 * a page is available, we're done...
1743 * if no page is available, grab the vm_page_queue_free_lock
1744 * and see if current number of free pages would allow us
1745 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1746 * if there are pages available, disable preemption and
1747 * recheck the state of the per-cpu free list... we could
1748 * have been preempted and moved to a different cpu, or
1749 * some other thread could have re-filled it... if still
1750 * empty, figure out how many pages we can steal from the
1751 * global free queue and move to the per-cpu queue...
1752 * return 1 of these pages when done... only wakeup the
1753 * pageout_scan thread if we moved pages from the global
1754 * list... no need for the wakeup if we've satisfied the
1755 * request from the per-cpu queue.
1756 */
1757
1758 #define COLOR_GROUPS_TO_STEAL 4
1759
1760
1761 vm_page_t
1762 vm_page_grab( void )
1763 {
1764 vm_page_t mem;
1765
1766
1767 disable_preemption();
1768
1769 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1770 return_page_from_cpu_list:
1771 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1772 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1773 mem->pageq.next = NULL;
1774
1775 enable_preemption();
1776
1777 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1778 assert(mem->tabled == FALSE);
1779 assert(mem->object == VM_OBJECT_NULL);
1780 assert(!mem->laundry);
1781 assert(!mem->free);
1782 assert(pmap_verify_free(mem->phys_page));
1783 assert(mem->busy);
1784 assert(!mem->encrypted);
1785 assert(!mem->pmapped);
1786 assert(!mem->wpmapped);
1787 assert(!mem->active);
1788 assert(!mem->inactive);
1789 assert(!mem->throttled);
1790 assert(!mem->speculative);
1791 assert(!pmap_is_noencrypt(mem->phys_page));
1792
1793 return mem;
1794 }
1795 enable_preemption();
1796
1797
1798 /*
1799 * Optionally produce warnings if the wire or gobble
1800 * counts exceed some threshold.
1801 */
1802 if (vm_page_wire_count_warning > 0
1803 && vm_page_wire_count >= vm_page_wire_count_warning) {
1804 printf("mk: vm_page_grab(): high wired page count of %d\n",
1805 vm_page_wire_count);
1806 assert(vm_page_wire_count < vm_page_wire_count_warning);
1807 }
1808 if (vm_page_gobble_count_warning > 0
1809 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1810 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1811 vm_page_gobble_count);
1812 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1813 }
1814
1815 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1816
1817 /*
1818 * Only let privileged threads (involved in pageout)
1819 * dip into the reserved pool.
1820 */
1821 if ((vm_page_free_count < vm_page_free_reserved) &&
1822 !(current_thread()->options & TH_OPT_VMPRIV)) {
1823 lck_mtx_unlock(&vm_page_queue_free_lock);
1824 mem = VM_PAGE_NULL;
1825 }
1826 else {
1827 vm_page_t head;
1828 vm_page_t tail;
1829 unsigned int pages_to_steal;
1830 unsigned int color;
1831
1832 while ( vm_page_free_count == 0 ) {
1833
1834 lck_mtx_unlock(&vm_page_queue_free_lock);
1835 /*
1836 * must be a privileged thread to be
1837 * in this state since a non-privileged
1838 * thread would have bailed if we were
1839 * under the vm_page_free_reserved mark
1840 */
1841 VM_PAGE_WAIT();
1842 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1843 }
1844
1845 disable_preemption();
1846
1847 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1848 lck_mtx_unlock(&vm_page_queue_free_lock);
1849
1850 /*
1851 * we got preempted and moved to another processor
1852 * or we got preempted and someone else ran and filled the cache
1853 */
1854 goto return_page_from_cpu_list;
1855 }
1856 if (vm_page_free_count <= vm_page_free_reserved)
1857 pages_to_steal = 1;
1858 else {
1859 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1860
1861 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1862 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1863 }
1864 color = PROCESSOR_DATA(current_processor(), start_color);
1865 head = tail = NULL;
1866
1867 while (pages_to_steal--) {
1868 if (--vm_page_free_count < vm_page_free_count_minimum)
1869 vm_page_free_count_minimum = vm_page_free_count;
1870
1871 while (queue_empty(&vm_page_queue_free[color]))
1872 color = (color + 1) & vm_color_mask;
1873
1874 queue_remove_first(&vm_page_queue_free[color],
1875 mem,
1876 vm_page_t,
1877 pageq);
1878 mem->pageq.next = NULL;
1879 mem->pageq.prev = NULL;
1880
1881 assert(!mem->active);
1882 assert(!mem->inactive);
1883 assert(!mem->throttled);
1884 assert(!mem->speculative);
1885
1886 color = (color + 1) & vm_color_mask;
1887
1888 if (head == NULL)
1889 head = mem;
1890 else
1891 tail->pageq.next = (queue_t)mem;
1892 tail = mem;
1893
1894 mem->pageq.prev = NULL;
1895 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1896 assert(mem->tabled == FALSE);
1897 assert(mem->object == VM_OBJECT_NULL);
1898 assert(!mem->laundry);
1899 assert(mem->free);
1900 mem->free = FALSE;
1901
1902 assert(pmap_verify_free(mem->phys_page));
1903 assert(mem->busy);
1904 assert(!mem->free);
1905 assert(!mem->encrypted);
1906 assert(!mem->pmapped);
1907 assert(!mem->wpmapped);
1908 assert(!pmap_is_noencrypt(mem->phys_page));
1909 }
1910 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1911 PROCESSOR_DATA(current_processor(), start_color) = color;
1912
1913 /*
1914 * satisfy this request
1915 */
1916 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1917 mem = head;
1918 mem->pageq.next = NULL;
1919
1920 lck_mtx_unlock(&vm_page_queue_free_lock);
1921
1922 enable_preemption();
1923 }
1924 /*
1925 * Decide if we should poke the pageout daemon.
1926 * We do this if the free count is less than the low
1927 * water mark, or if the free count is less than the high
1928 * water mark (but above the low water mark) and the inactive
1929 * count is less than its target.
1930 *
1931 * We don't have the counts locked ... if they change a little,
1932 * it doesn't really matter.
1933 */
1934 if ((vm_page_free_count < vm_page_free_min) ||
1935 ((vm_page_free_count < vm_page_free_target) &&
1936 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1937 thread_wakeup((event_t) &vm_page_free_wanted);
1938
1939 VM_CHECK_MEMORYSTATUS;
1940
1941 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1942
1943 return mem;
1944 }
1945
1946 /*
1947 * vm_page_release:
1948 *
1949 * Return a page to the free list.
1950 */
1951
1952 void
1953 vm_page_release(
1954 register vm_page_t mem)
1955 {
1956 unsigned int color;
1957 int need_wakeup = 0;
1958 int need_priv_wakeup = 0;
1959
1960
1961 assert(!mem->private && !mem->fictitious);
1962 if (vm_page_free_verify) {
1963 assert(pmap_verify_free(mem->phys_page));
1964 }
1965 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1966
1967
1968 pmap_clear_noencrypt(mem->phys_page);
1969
1970 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1971 #if DEBUG
1972 if (mem->free)
1973 panic("vm_page_release");
1974 #endif
1975
1976 assert(mem->busy);
1977 assert(!mem->laundry);
1978 assert(mem->object == VM_OBJECT_NULL);
1979 assert(mem->pageq.next == NULL &&
1980 mem->pageq.prev == NULL);
1981 assert(mem->listq.next == NULL &&
1982 mem->listq.prev == NULL);
1983
1984 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
1985 vm_lopage_free_count < vm_lopage_free_limit &&
1986 mem->phys_page < max_valid_low_ppnum) {
1987 /*
1988 * this exists to support hardware controllers
1989 * incapable of generating DMAs with more than 32 bits
1990 * of address on platforms with physical memory > 4G...
1991 */
1992 queue_enter_first(&vm_lopage_queue_free,
1993 mem,
1994 vm_page_t,
1995 pageq);
1996 vm_lopage_free_count++;
1997
1998 if (vm_lopage_free_count >= vm_lopage_free_limit)
1999 vm_lopage_refill = FALSE;
2000
2001 mem->lopage = TRUE;
2002 } else {
2003 mem->lopage = FALSE;
2004 mem->free = TRUE;
2005
2006 color = mem->phys_page & vm_color_mask;
2007 queue_enter_first(&vm_page_queue_free[color],
2008 mem,
2009 vm_page_t,
2010 pageq);
2011 vm_page_free_count++;
2012 /*
2013 * Check if we should wake up someone waiting for page.
2014 * But don't bother waking them unless they can allocate.
2015 *
2016 * We wakeup only one thread, to prevent starvation.
2017 * Because the scheduling system handles wait queues FIFO,
2018 * if we wakeup all waiting threads, one greedy thread
2019 * can starve multiple niceguy threads. When the threads
2020 * all wakeup, the greedy threads runs first, grabs the page,
2021 * and waits for another page. It will be the first to run
2022 * when the next page is freed.
2023 *
2024 * However, there is a slight danger here.
2025 * The thread we wake might not use the free page.
2026 * Then the other threads could wait indefinitely
2027 * while the page goes unused. To forestall this,
2028 * the pageout daemon will keep making free pages
2029 * as long as vm_page_free_wanted is non-zero.
2030 */
2031
2032 assert(vm_page_free_count > 0);
2033 if (vm_page_free_wanted_privileged > 0) {
2034 vm_page_free_wanted_privileged--;
2035 need_priv_wakeup = 1;
2036 } else if (vm_page_free_wanted > 0 &&
2037 vm_page_free_count > vm_page_free_reserved) {
2038 vm_page_free_wanted--;
2039 need_wakeup = 1;
2040 }
2041 }
2042 lck_mtx_unlock(&vm_page_queue_free_lock);
2043
2044 if (need_priv_wakeup)
2045 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2046 else if (need_wakeup)
2047 thread_wakeup_one((event_t) &vm_page_free_count);
2048
2049 VM_CHECK_MEMORYSTATUS;
2050 }
2051
2052 /*
2053 * vm_page_wait:
2054 *
2055 * Wait for a page to become available.
2056 * If there are plenty of free pages, then we don't sleep.
2057 *
2058 * Returns:
2059 * TRUE: There may be another page, try again
2060 * FALSE: We were interrupted out of our wait, don't try again
2061 */
2062
2063 boolean_t
2064 vm_page_wait(
2065 int interruptible )
2066 {
2067 /*
2068 * We can't use vm_page_free_reserved to make this
2069 * determination. Consider: some thread might
2070 * need to allocate two pages. The first allocation
2071 * succeeds, the second fails. After the first page is freed,
2072 * a call to vm_page_wait must really block.
2073 */
2074 kern_return_t wait_result;
2075 int need_wakeup = 0;
2076 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2077
2078 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2079
2080 if (is_privileged && vm_page_free_count) {
2081 lck_mtx_unlock(&vm_page_queue_free_lock);
2082 return TRUE;
2083 }
2084 if (vm_page_free_count < vm_page_free_target) {
2085
2086 if (is_privileged) {
2087 if (vm_page_free_wanted_privileged++ == 0)
2088 need_wakeup = 1;
2089 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2090 } else {
2091 if (vm_page_free_wanted++ == 0)
2092 need_wakeup = 1;
2093 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2094 }
2095 lck_mtx_unlock(&vm_page_queue_free_lock);
2096 counter(c_vm_page_wait_block++);
2097
2098 if (need_wakeup)
2099 thread_wakeup((event_t)&vm_page_free_wanted);
2100
2101 if (wait_result == THREAD_WAITING)
2102 wait_result = thread_block(THREAD_CONTINUE_NULL);
2103
2104 return(wait_result == THREAD_AWAKENED);
2105 } else {
2106 lck_mtx_unlock(&vm_page_queue_free_lock);
2107 return TRUE;
2108 }
2109 }
2110
2111 /*
2112 * vm_page_alloc:
2113 *
2114 * Allocate and return a memory cell associated
2115 * with this VM object/offset pair.
2116 *
2117 * Object must be locked.
2118 */
2119
2120 vm_page_t
2121 vm_page_alloc(
2122 vm_object_t object,
2123 vm_object_offset_t offset)
2124 {
2125 register vm_page_t mem;
2126
2127 vm_object_lock_assert_exclusive(object);
2128 mem = vm_page_grab();
2129 if (mem == VM_PAGE_NULL)
2130 return VM_PAGE_NULL;
2131
2132 vm_page_insert(mem, object, offset);
2133
2134 return(mem);
2135 }
2136
2137 vm_page_t
2138 vm_page_alloclo(
2139 vm_object_t object,
2140 vm_object_offset_t offset)
2141 {
2142 register vm_page_t mem;
2143
2144 vm_object_lock_assert_exclusive(object);
2145 mem = vm_page_grablo();
2146 if (mem == VM_PAGE_NULL)
2147 return VM_PAGE_NULL;
2148
2149 vm_page_insert(mem, object, offset);
2150
2151 return(mem);
2152 }
2153
2154
2155 /*
2156 * vm_page_alloc_guard:
2157 *
2158 * Allocate a fictitious page which will be used
2159 * as a guard page. The page will be inserted into
2160 * the object and returned to the caller.
2161 */
2162
2163 vm_page_t
2164 vm_page_alloc_guard(
2165 vm_object_t object,
2166 vm_object_offset_t offset)
2167 {
2168 register vm_page_t mem;
2169
2170 vm_object_lock_assert_exclusive(object);
2171 mem = vm_page_grab_guard();
2172 if (mem == VM_PAGE_NULL)
2173 return VM_PAGE_NULL;
2174
2175 vm_page_insert(mem, object, offset);
2176
2177 return(mem);
2178 }
2179
2180
2181 counter(unsigned int c_laundry_pages_freed = 0;)
2182
2183 /*
2184 * vm_page_free_prepare:
2185 *
2186 * Removes page from any queue it may be on
2187 * and disassociates it from its VM object.
2188 *
2189 * Object and page queues must be locked prior to entry.
2190 */
2191 static void
2192 vm_page_free_prepare(
2193 vm_page_t mem)
2194 {
2195 vm_page_free_prepare_queues(mem);
2196 vm_page_free_prepare_object(mem, TRUE);
2197 }
2198
2199
2200 void
2201 vm_page_free_prepare_queues(
2202 vm_page_t mem)
2203 {
2204 VM_PAGE_CHECK(mem);
2205 assert(!mem->free);
2206 assert(!mem->cleaning);
2207 assert(!mem->pageout);
2208 #if DEBUG
2209 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2210 if (mem->free)
2211 panic("vm_page_free: freeing page on free list\n");
2212 #endif
2213 if (mem->object) {
2214 vm_object_lock_assert_exclusive(mem->object);
2215 }
2216
2217 if (mem->laundry) {
2218 /*
2219 * We may have to free a page while it's being laundered
2220 * if we lost its pager (due to a forced unmount, for example).
2221 * We need to call vm_pageout_throttle_up() before removing
2222 * the page from its VM object, so that we can find out on
2223 * which pageout queue the page is on.
2224 */
2225 vm_pageout_throttle_up(mem);
2226 counter(++c_laundry_pages_freed);
2227 }
2228 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2229
2230 if (VM_PAGE_WIRED(mem)) {
2231 if (mem->object) {
2232 assert(mem->object->wired_page_count > 0);
2233 mem->object->wired_page_count--;
2234 assert(mem->object->resident_page_count >=
2235 mem->object->wired_page_count);
2236
2237 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2238 OSAddAtomic(+1, &vm_page_purgeable_count);
2239 assert(vm_page_purgeable_wired_count > 0);
2240 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2241 }
2242 }
2243 if (!mem->private && !mem->fictitious)
2244 vm_page_wire_count--;
2245 mem->wire_count = 0;
2246 assert(!mem->gobbled);
2247 } else if (mem->gobbled) {
2248 if (!mem->private && !mem->fictitious)
2249 vm_page_wire_count--;
2250 vm_page_gobble_count--;
2251 }
2252 }
2253
2254
2255 void
2256 vm_page_free_prepare_object(
2257 vm_page_t mem,
2258 boolean_t remove_from_hash)
2259 {
2260 if (mem->tabled)
2261 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2262
2263 PAGE_WAKEUP(mem); /* clears wanted */
2264
2265 if (mem->private) {
2266 mem->private = FALSE;
2267 mem->fictitious = TRUE;
2268 mem->phys_page = vm_page_fictitious_addr;
2269 }
2270 if ( !mem->fictitious) {
2271 if (mem->zero_fill == TRUE)
2272 VM_ZF_COUNT_DECR();
2273 vm_page_init(mem, mem->phys_page, mem->lopage);
2274 }
2275 }
2276
2277
2278 /*
2279 * vm_page_free:
2280 *
2281 * Returns the given page to the free list,
2282 * disassociating it with any VM object.
2283 *
2284 * Object and page queues must be locked prior to entry.
2285 */
2286 void
2287 vm_page_free(
2288 vm_page_t mem)
2289 {
2290 vm_page_free_prepare(mem);
2291
2292 if (mem->fictitious) {
2293 vm_page_release_fictitious(mem);
2294 } else {
2295 vm_page_release(mem);
2296 }
2297 }
2298
2299
2300 void
2301 vm_page_free_unlocked(
2302 vm_page_t mem,
2303 boolean_t remove_from_hash)
2304 {
2305 vm_page_lockspin_queues();
2306 vm_page_free_prepare_queues(mem);
2307 vm_page_unlock_queues();
2308
2309 vm_page_free_prepare_object(mem, remove_from_hash);
2310
2311 if (mem->fictitious) {
2312 vm_page_release_fictitious(mem);
2313 } else {
2314 vm_page_release(mem);
2315 }
2316 }
2317
2318 /*
2319 * Free a list of pages. The list can be up to several hundred pages,
2320 * as blocked up by vm_pageout_scan().
2321 * The big win is not having to take the free list lock once
2322 * per page. We sort the incoming pages into n lists, one for
2323 * each color.
2324 */
2325 void
2326 vm_page_free_list(
2327 vm_page_t mem,
2328 boolean_t prepare_object)
2329 {
2330 vm_page_t nxt;
2331 int pg_count = 0;
2332 int color;
2333 int inuse_list_head = -1;
2334
2335 queue_head_t free_list[MAX_COLORS];
2336 int inuse[MAX_COLORS];
2337
2338 for (color = 0; color < (signed) vm_colors; color++) {
2339 queue_init(&free_list[color]);
2340 }
2341
2342 while (mem) {
2343 assert(!mem->inactive);
2344 assert(!mem->active);
2345 assert(!mem->throttled);
2346 assert(!mem->free);
2347 assert(!mem->speculative);
2348 assert(!VM_PAGE_WIRED(mem));
2349 assert(mem->pageq.prev == NULL);
2350
2351 nxt = (vm_page_t)(mem->pageq.next);
2352
2353 if (prepare_object == TRUE)
2354 vm_page_free_prepare_object(mem, TRUE);
2355
2356 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2357 assert(pmap_verify_free(mem->phys_page));
2358 }
2359
2360 if (!mem->fictitious) {
2361 assert(mem->busy);
2362 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2363 vm_lopage_free_count < vm_lopage_free_limit &&
2364 mem->phys_page < max_valid_low_ppnum) {
2365 mem->pageq.next = NULL;
2366 vm_page_release(mem);
2367 } else {
2368
2369 /*
2370 * IMPORTANT: we can't set the page "free" here
2371 * because that would make the page eligible for
2372 * a physically-contiguous allocation (see
2373 * vm_page_find_contiguous()) right away (we don't
2374 * hold the vm_page_queue_free lock). That would
2375 * cause trouble because the page is not actually
2376 * in the free queue yet...
2377 */
2378 color = mem->phys_page & vm_color_mask;
2379 if (queue_empty(&free_list[color])) {
2380 inuse[color] = inuse_list_head;
2381 inuse_list_head = color;
2382 }
2383 queue_enter_first(&free_list[color],
2384 mem,
2385 vm_page_t,
2386 pageq);
2387 pg_count++;
2388
2389 pmap_clear_noencrypt(mem->phys_page);
2390 }
2391 } else {
2392 assert(mem->phys_page == vm_page_fictitious_addr ||
2393 mem->phys_page == vm_page_guard_addr);
2394 vm_page_release_fictitious(mem);
2395 }
2396 mem = nxt;
2397 }
2398 if (pg_count) {
2399 unsigned int avail_free_count;
2400 unsigned int need_wakeup = 0;
2401 unsigned int need_priv_wakeup = 0;
2402
2403 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2404
2405 color = inuse_list_head;
2406
2407 while( color != -1 ) {
2408 vm_page_t first, last;
2409 vm_page_t first_free;
2410
2411 /*
2412 * Now that we hold the vm_page_queue_free lock,
2413 * it's safe to mark all pages in our local queue
2414 * as "free"...
2415 */
2416 queue_iterate(&free_list[color],
2417 mem,
2418 vm_page_t,
2419 pageq) {
2420 assert(!mem->free);
2421 assert(mem->busy);
2422 mem->free = TRUE;
2423 }
2424
2425 /*
2426 * ... and insert our local queue at the head of
2427 * the global free queue.
2428 */
2429 first = (vm_page_t) queue_first(&free_list[color]);
2430 last = (vm_page_t) queue_last(&free_list[color]);
2431 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2432 if (queue_empty(&vm_page_queue_free[color])) {
2433 queue_last(&vm_page_queue_free[color]) =
2434 (queue_entry_t) last;
2435 } else {
2436 queue_prev(&first_free->pageq) =
2437 (queue_entry_t) last;
2438 }
2439 queue_first(&vm_page_queue_free[color]) =
2440 (queue_entry_t) first;
2441 queue_prev(&first->pageq) =
2442 (queue_entry_t) &vm_page_queue_free[color];
2443 queue_next(&last->pageq) =
2444 (queue_entry_t) first_free;
2445
2446 /* next color */
2447 color = inuse[color];
2448 }
2449
2450 vm_page_free_count += pg_count;
2451 avail_free_count = vm_page_free_count;
2452
2453 if (vm_page_free_wanted_privileged > 0 &&
2454 avail_free_count > 0) {
2455 if (avail_free_count < vm_page_free_wanted_privileged) {
2456 need_priv_wakeup = avail_free_count;
2457 vm_page_free_wanted_privileged -=
2458 avail_free_count;
2459 avail_free_count = 0;
2460 } else {
2461 need_priv_wakeup = vm_page_free_wanted_privileged;
2462 vm_page_free_wanted_privileged = 0;
2463 avail_free_count -=
2464 vm_page_free_wanted_privileged;
2465 }
2466 }
2467
2468 if (vm_page_free_wanted > 0 &&
2469 avail_free_count > vm_page_free_reserved) {
2470 unsigned int available_pages;
2471
2472 available_pages = (avail_free_count -
2473 vm_page_free_reserved);
2474
2475 if (available_pages >= vm_page_free_wanted) {
2476 need_wakeup = vm_page_free_wanted;
2477 vm_page_free_wanted = 0;
2478 } else {
2479 need_wakeup = available_pages;
2480 vm_page_free_wanted -= available_pages;
2481 }
2482 }
2483 lck_mtx_unlock(&vm_page_queue_free_lock);
2484
2485 if (need_priv_wakeup != 0) {
2486 /*
2487 * There shouldn't be that many VM-privileged threads,
2488 * so let's wake them all up, even if we don't quite
2489 * have enough pages to satisfy them all.
2490 */
2491 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2492 }
2493 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2494 /*
2495 * We don't expect to have any more waiters
2496 * after this, so let's wake them all up at
2497 * once.
2498 */
2499 thread_wakeup((event_t) &vm_page_free_count);
2500 } else for (; need_wakeup != 0; need_wakeup--) {
2501 /*
2502 * Wake up one waiter per page we just released.
2503 */
2504 thread_wakeup_one((event_t) &vm_page_free_count);
2505 }
2506
2507 VM_CHECK_MEMORYSTATUS;
2508 }
2509 }
2510
2511
2512 /*
2513 * vm_page_wire:
2514 *
2515 * Mark this page as wired down by yet
2516 * another map, removing it from paging queues
2517 * as necessary.
2518 *
2519 * The page's object and the page queues must be locked.
2520 */
2521 void
2522 vm_page_wire(
2523 register vm_page_t mem)
2524 {
2525
2526 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2527
2528 VM_PAGE_CHECK(mem);
2529 if (mem->object) {
2530 vm_object_lock_assert_exclusive(mem->object);
2531 } else {
2532 /*
2533 * In theory, the page should be in an object before it
2534 * gets wired, since we need to hold the object lock
2535 * to update some fields in the page structure.
2536 * However, some code (i386 pmap, for example) might want
2537 * to wire a page before it gets inserted into an object.
2538 * That's somewhat OK, as long as nobody else can get to
2539 * that page and update it at the same time.
2540 */
2541 }
2542 #if DEBUG
2543 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2544 #endif
2545 if ( !VM_PAGE_WIRED(mem)) {
2546 VM_PAGE_QUEUES_REMOVE(mem);
2547
2548 if (mem->object) {
2549 mem->object->wired_page_count++;
2550 assert(mem->object->resident_page_count >=
2551 mem->object->wired_page_count);
2552 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2553 assert(vm_page_purgeable_count > 0);
2554 OSAddAtomic(-1, &vm_page_purgeable_count);
2555 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2556 }
2557 if (mem->object->all_reusable) {
2558 /*
2559 * Wired pages are not counted as "re-usable"
2560 * in "all_reusable" VM objects, so nothing
2561 * to do here.
2562 */
2563 } else if (mem->reusable) {
2564 /*
2565 * This page is not "re-usable" when it's
2566 * wired, so adjust its state and the
2567 * accounting.
2568 */
2569 vm_object_reuse_pages(mem->object,
2570 mem->offset,
2571 mem->offset+PAGE_SIZE_64,
2572 FALSE);
2573 }
2574 }
2575 assert(!mem->reusable);
2576
2577 if (!mem->private && !mem->fictitious && !mem->gobbled)
2578 vm_page_wire_count++;
2579 if (mem->gobbled)
2580 vm_page_gobble_count--;
2581 mem->gobbled = FALSE;
2582 if (mem->zero_fill == TRUE) {
2583 mem->zero_fill = FALSE;
2584 VM_ZF_COUNT_DECR();
2585 }
2586
2587 VM_CHECK_MEMORYSTATUS;
2588
2589 /*
2590 * ENCRYPTED SWAP:
2591 * The page could be encrypted, but
2592 * We don't have to decrypt it here
2593 * because we don't guarantee that the
2594 * data is actually valid at this point.
2595 * The page will get decrypted in
2596 * vm_fault_wire() if needed.
2597 */
2598 }
2599 assert(!mem->gobbled);
2600 mem->wire_count++;
2601 VM_PAGE_CHECK(mem);
2602 }
2603
2604 /*
2605 * vm_page_gobble:
2606 *
2607 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2608 *
2609 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2610 */
2611 void
2612 vm_page_gobble(
2613 register vm_page_t mem)
2614 {
2615 vm_page_lockspin_queues();
2616 VM_PAGE_CHECK(mem);
2617
2618 assert(!mem->gobbled);
2619 assert( !VM_PAGE_WIRED(mem));
2620
2621 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2622 if (!mem->private && !mem->fictitious)
2623 vm_page_wire_count++;
2624 }
2625 vm_page_gobble_count++;
2626 mem->gobbled = TRUE;
2627 vm_page_unlock_queues();
2628 }
2629
2630 /*
2631 * vm_page_unwire:
2632 *
2633 * Release one wiring of this page, potentially
2634 * enabling it to be paged again.
2635 *
2636 * The page's object and the page queues must be locked.
2637 */
2638 void
2639 vm_page_unwire(
2640 vm_page_t mem,
2641 boolean_t queueit)
2642 {
2643
2644 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2645
2646 VM_PAGE_CHECK(mem);
2647 assert(VM_PAGE_WIRED(mem));
2648 assert(mem->object != VM_OBJECT_NULL);
2649 #if DEBUG
2650 vm_object_lock_assert_exclusive(mem->object);
2651 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2652 #endif
2653 if (--mem->wire_count == 0) {
2654 assert(!mem->private && !mem->fictitious);
2655 vm_page_wire_count--;
2656 assert(mem->object->wired_page_count > 0);
2657 mem->object->wired_page_count--;
2658 assert(mem->object->resident_page_count >=
2659 mem->object->wired_page_count);
2660 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2661 OSAddAtomic(+1, &vm_page_purgeable_count);
2662 assert(vm_page_purgeable_wired_count > 0);
2663 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2664 }
2665 assert(!mem->laundry);
2666 assert(mem->object != kernel_object);
2667 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2668
2669 if (queueit == TRUE) {
2670 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2671 vm_page_deactivate(mem);
2672 } else {
2673 vm_page_activate(mem);
2674 }
2675 }
2676
2677 VM_CHECK_MEMORYSTATUS;
2678
2679 }
2680 VM_PAGE_CHECK(mem);
2681 }
2682
2683 /*
2684 * vm_page_deactivate:
2685 *
2686 * Returns the given page to the inactive list,
2687 * indicating that no physical maps have access
2688 * to this page. [Used by the physical mapping system.]
2689 *
2690 * The page queues must be locked.
2691 */
2692 void
2693 vm_page_deactivate(
2694 vm_page_t m)
2695 {
2696 vm_page_deactivate_internal(m, TRUE);
2697 }
2698
2699
2700 void
2701 vm_page_deactivate_internal(
2702 vm_page_t m,
2703 boolean_t clear_hw_reference)
2704 {
2705
2706 VM_PAGE_CHECK(m);
2707 assert(m->object != kernel_object);
2708 assert(m->phys_page != vm_page_guard_addr);
2709
2710 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2711 #if DEBUG
2712 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2713 #endif
2714 /*
2715 * This page is no longer very interesting. If it was
2716 * interesting (active or inactive/referenced), then we
2717 * clear the reference bit and (re)enter it in the
2718 * inactive queue. Note wired pages should not have
2719 * their reference bit cleared.
2720 */
2721 assert ( !(m->absent && !m->unusual));
2722
2723 if (m->gobbled) { /* can this happen? */
2724 assert( !VM_PAGE_WIRED(m));
2725
2726 if (!m->private && !m->fictitious)
2727 vm_page_wire_count--;
2728 vm_page_gobble_count--;
2729 m->gobbled = FALSE;
2730 }
2731 if (m->private || m->fictitious || (VM_PAGE_WIRED(m)))
2732 return;
2733
2734 if (!m->absent && clear_hw_reference == TRUE)
2735 pmap_clear_reference(m->phys_page);
2736
2737 m->reference = FALSE;
2738 m->no_cache = FALSE;
2739
2740 if (!m->inactive) {
2741 VM_PAGE_QUEUES_REMOVE(m);
2742
2743 assert(!m->laundry);
2744 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2745
2746 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2747 m->dirty && m->object->internal &&
2748 (m->object->purgable == VM_PURGABLE_DENY ||
2749 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2750 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2751 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2752 m->throttled = TRUE;
2753 vm_page_throttled_count++;
2754 } else {
2755 if (m->object->named && m->object->ref_count == 1) {
2756 vm_page_speculate(m, FALSE);
2757 #if DEVELOPMENT || DEBUG
2758 vm_page_speculative_recreated++;
2759 #endif
2760 } else {
2761 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2762 }
2763 }
2764 }
2765 }
2766
2767 /*
2768 * vm_page_activate:
2769 *
2770 * Put the specified page on the active list (if appropriate).
2771 *
2772 * The page queues must be locked.
2773 */
2774
2775 void
2776 vm_page_activate(
2777 register vm_page_t m)
2778 {
2779 VM_PAGE_CHECK(m);
2780 #ifdef FIXME_4778297
2781 assert(m->object != kernel_object);
2782 #endif
2783 assert(m->phys_page != vm_page_guard_addr);
2784 #if DEBUG
2785 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2786 #endif
2787 assert( !(m->absent && !m->unusual));
2788
2789 if (m->gobbled) {
2790 assert( !VM_PAGE_WIRED(m));
2791 if (!m->private && !m->fictitious)
2792 vm_page_wire_count--;
2793 vm_page_gobble_count--;
2794 m->gobbled = FALSE;
2795 }
2796 if (m->private || m->fictitious)
2797 return;
2798
2799 #if DEBUG
2800 if (m->active)
2801 panic("vm_page_activate: already active");
2802 #endif
2803
2804 if (m->speculative) {
2805 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2806 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2807 }
2808
2809 VM_PAGE_QUEUES_REMOVE(m);
2810
2811 if ( !VM_PAGE_WIRED(m)) {
2812 assert(!m->laundry);
2813 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2814 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2815 m->dirty && m->object->internal &&
2816 (m->object->purgable == VM_PURGABLE_DENY ||
2817 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2818 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2819 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2820 m->throttled = TRUE;
2821 vm_page_throttled_count++;
2822 } else {
2823 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2824 m->active = TRUE;
2825 vm_page_active_count++;
2826 }
2827 m->reference = TRUE;
2828 m->no_cache = FALSE;
2829 }
2830 VM_PAGE_CHECK(m);
2831 }
2832
2833
2834 /*
2835 * vm_page_speculate:
2836 *
2837 * Put the specified page on the speculative list (if appropriate).
2838 *
2839 * The page queues must be locked.
2840 */
2841 void
2842 vm_page_speculate(
2843 vm_page_t m,
2844 boolean_t new)
2845 {
2846 struct vm_speculative_age_q *aq;
2847
2848 VM_PAGE_CHECK(m);
2849 assert(m->object != kernel_object);
2850 assert(m->phys_page != vm_page_guard_addr);
2851 #if DEBUG
2852 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2853 #endif
2854 assert( !(m->absent && !m->unusual));
2855
2856 if (m->private || m->fictitious)
2857 return;
2858
2859 VM_PAGE_QUEUES_REMOVE(m);
2860
2861 if ( !VM_PAGE_WIRED(m)) {
2862 mach_timespec_t ts;
2863 clock_sec_t sec;
2864 clock_nsec_t nsec;
2865
2866 clock_get_system_nanotime(&sec, &nsec);
2867 ts.tv_sec = (unsigned int) sec;
2868 ts.tv_nsec = nsec;
2869
2870 if (vm_page_speculative_count == 0) {
2871
2872 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2873 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2874
2875 aq = &vm_page_queue_speculative[speculative_age_index];
2876
2877 /*
2878 * set the timer to begin a new group
2879 */
2880 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2881 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2882
2883 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2884 } else {
2885 aq = &vm_page_queue_speculative[speculative_age_index];
2886
2887 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2888
2889 speculative_age_index++;
2890
2891 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2892 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2893 if (speculative_age_index == speculative_steal_index) {
2894 speculative_steal_index = speculative_age_index + 1;
2895
2896 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2897 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2898 }
2899 aq = &vm_page_queue_speculative[speculative_age_index];
2900
2901 if (!queue_empty(&aq->age_q))
2902 vm_page_speculate_ageit(aq);
2903
2904 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
2905 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
2906
2907 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2908 }
2909 }
2910 enqueue_tail(&aq->age_q, &m->pageq);
2911 m->speculative = TRUE;
2912 vm_page_speculative_count++;
2913
2914 if (new == TRUE) {
2915 vm_object_lock_assert_exclusive(m->object);
2916
2917 m->object->pages_created++;
2918 #if DEVELOPMENT || DEBUG
2919 vm_page_speculative_created++;
2920 #endif
2921 }
2922 }
2923 VM_PAGE_CHECK(m);
2924 }
2925
2926
2927 /*
2928 * move pages from the specified aging bin to
2929 * the speculative bin that pageout_scan claims from
2930 *
2931 * The page queues must be locked.
2932 */
2933 void
2934 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
2935 {
2936 struct vm_speculative_age_q *sq;
2937 vm_page_t t;
2938
2939 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
2940
2941 if (queue_empty(&sq->age_q)) {
2942 sq->age_q.next = aq->age_q.next;
2943 sq->age_q.prev = aq->age_q.prev;
2944
2945 t = (vm_page_t)sq->age_q.next;
2946 t->pageq.prev = &sq->age_q;
2947
2948 t = (vm_page_t)sq->age_q.prev;
2949 t->pageq.next = &sq->age_q;
2950 } else {
2951 t = (vm_page_t)sq->age_q.prev;
2952 t->pageq.next = aq->age_q.next;
2953
2954 t = (vm_page_t)aq->age_q.next;
2955 t->pageq.prev = sq->age_q.prev;
2956
2957 t = (vm_page_t)aq->age_q.prev;
2958 t->pageq.next = &sq->age_q;
2959
2960 sq->age_q.prev = aq->age_q.prev;
2961 }
2962 queue_init(&aq->age_q);
2963 }
2964
2965
2966 void
2967 vm_page_lru(
2968 vm_page_t m)
2969 {
2970 VM_PAGE_CHECK(m);
2971 assert(m->object != kernel_object);
2972 assert(m->phys_page != vm_page_guard_addr);
2973
2974 #if DEBUG
2975 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2976 #endif
2977 if (m->active || m->reference)
2978 return;
2979
2980 if (m->private || (VM_PAGE_WIRED(m)))
2981 return;
2982
2983 m->no_cache = FALSE;
2984
2985 VM_PAGE_QUEUES_REMOVE(m);
2986
2987 assert(!m->laundry);
2988 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2989
2990 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2991 }
2992
2993
2994 void
2995 vm_page_reactivate_all_throttled(void)
2996 {
2997 vm_page_t first_throttled, last_throttled;
2998 vm_page_t first_active;
2999 vm_page_t m;
3000 int extra_active_count;
3001
3002 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3003 return;
3004
3005 extra_active_count = 0;
3006 vm_page_lock_queues();
3007 if (! queue_empty(&vm_page_queue_throttled)) {
3008 /*
3009 * Switch "throttled" pages to "active".
3010 */
3011 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3012 VM_PAGE_CHECK(m);
3013 assert(m->throttled);
3014 assert(!m->active);
3015 assert(!m->inactive);
3016 assert(!m->speculative);
3017 assert(!VM_PAGE_WIRED(m));
3018
3019 extra_active_count++;
3020
3021 m->throttled = FALSE;
3022 m->active = TRUE;
3023 VM_PAGE_CHECK(m);
3024 }
3025
3026 /*
3027 * Transfer the entire throttled queue to a regular LRU page queues.
3028 * We insert it at the head of the active queue, so that these pages
3029 * get re-evaluated by the LRU algorithm first, since they've been
3030 * completely out of it until now.
3031 */
3032 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3033 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3034 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3035 if (queue_empty(&vm_page_queue_active)) {
3036 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3037 } else {
3038 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3039 }
3040 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3041 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3042 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3043
3044 #if DEBUG
3045 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3046 #endif
3047 queue_init(&vm_page_queue_throttled);
3048 /*
3049 * Adjust the global page counts.
3050 */
3051 vm_page_active_count += extra_active_count;
3052 vm_page_throttled_count = 0;
3053 }
3054 assert(vm_page_throttled_count == 0);
3055 assert(queue_empty(&vm_page_queue_throttled));
3056 vm_page_unlock_queues();
3057 }
3058
3059
3060 /*
3061 * move pages from the indicated local queue to the global active queue
3062 * its ok to fail if we're below the hard limit and force == FALSE
3063 * the nolocks == TRUE case is to allow this function to be run on
3064 * the hibernate path
3065 */
3066
3067 void
3068 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3069 {
3070 struct vpl *lq;
3071 vm_page_t first_local, last_local;
3072 vm_page_t first_active;
3073 vm_page_t m;
3074 uint32_t count = 0;
3075
3076 if (vm_page_local_q == NULL)
3077 return;
3078
3079 lq = &vm_page_local_q[lid].vpl_un.vpl;
3080
3081 if (nolocks == FALSE) {
3082 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3083 if ( !vm_page_trylockspin_queues())
3084 return;
3085 } else
3086 vm_page_lockspin_queues();
3087
3088 VPL_LOCK(&lq->vpl_lock);
3089 }
3090 if (lq->vpl_count) {
3091 /*
3092 * Switch "local" pages to "active".
3093 */
3094 assert(!queue_empty(&lq->vpl_queue));
3095
3096 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3097 VM_PAGE_CHECK(m);
3098 assert(m->local);
3099 assert(!m->active);
3100 assert(!m->inactive);
3101 assert(!m->speculative);
3102 assert(!VM_PAGE_WIRED(m));
3103 assert(!m->throttled);
3104 assert(!m->fictitious);
3105
3106 if (m->local_id != lid)
3107 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3108
3109 m->local_id = 0;
3110 m->local = FALSE;
3111 m->active = TRUE;
3112 VM_PAGE_CHECK(m);
3113
3114 count++;
3115 }
3116 if (count != lq->vpl_count)
3117 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3118
3119 /*
3120 * Transfer the entire local queue to a regular LRU page queues.
3121 */
3122 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3123 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3124 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3125
3126 if (queue_empty(&vm_page_queue_active)) {
3127 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3128 } else {
3129 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3130 }
3131 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3132 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3133 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3134
3135 queue_init(&lq->vpl_queue);
3136 /*
3137 * Adjust the global page counts.
3138 */
3139 vm_page_active_count += lq->vpl_count;
3140 lq->vpl_count = 0;
3141 }
3142 assert(queue_empty(&lq->vpl_queue));
3143
3144 if (nolocks == FALSE) {
3145 VPL_UNLOCK(&lq->vpl_lock);
3146 vm_page_unlock_queues();
3147 }
3148 }
3149
3150 /*
3151 * vm_page_part_zero_fill:
3152 *
3153 * Zero-fill a part of the page.
3154 */
3155 void
3156 vm_page_part_zero_fill(
3157 vm_page_t m,
3158 vm_offset_t m_pa,
3159 vm_size_t len)
3160 {
3161 vm_page_t tmp;
3162
3163 VM_PAGE_CHECK(m);
3164 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3165 pmap_zero_part_page(m->phys_page, m_pa, len);
3166 #else
3167 while (1) {
3168 tmp = vm_page_grab();
3169 if (tmp == VM_PAGE_NULL) {
3170 vm_page_wait(THREAD_UNINT);
3171 continue;
3172 }
3173 break;
3174 }
3175 vm_page_zero_fill(tmp);
3176 if(m_pa != 0) {
3177 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3178 }
3179 if((m_pa + len) < PAGE_SIZE) {
3180 vm_page_part_copy(m, m_pa + len, tmp,
3181 m_pa + len, PAGE_SIZE - (m_pa + len));
3182 }
3183 vm_page_copy(tmp,m);
3184 VM_PAGE_FREE(tmp);
3185 #endif
3186
3187 }
3188
3189 /*
3190 * vm_page_zero_fill:
3191 *
3192 * Zero-fill the specified page.
3193 */
3194 void
3195 vm_page_zero_fill(
3196 vm_page_t m)
3197 {
3198 XPR(XPR_VM_PAGE,
3199 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3200 m->object, m->offset, m, 0,0);
3201
3202 VM_PAGE_CHECK(m);
3203
3204 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3205 pmap_zero_page(m->phys_page);
3206 }
3207
3208 /*
3209 * vm_page_part_copy:
3210 *
3211 * copy part of one page to another
3212 */
3213
3214 void
3215 vm_page_part_copy(
3216 vm_page_t src_m,
3217 vm_offset_t src_pa,
3218 vm_page_t dst_m,
3219 vm_offset_t dst_pa,
3220 vm_size_t len)
3221 {
3222 VM_PAGE_CHECK(src_m);
3223 VM_PAGE_CHECK(dst_m);
3224
3225 pmap_copy_part_page(src_m->phys_page, src_pa,
3226 dst_m->phys_page, dst_pa, len);
3227 }
3228
3229 /*
3230 * vm_page_copy:
3231 *
3232 * Copy one page to another
3233 *
3234 * ENCRYPTED SWAP:
3235 * The source page should not be encrypted. The caller should
3236 * make sure the page is decrypted first, if necessary.
3237 */
3238
3239 int vm_page_copy_cs_validations = 0;
3240 int vm_page_copy_cs_tainted = 0;
3241
3242 void
3243 vm_page_copy(
3244 vm_page_t src_m,
3245 vm_page_t dest_m)
3246 {
3247 XPR(XPR_VM_PAGE,
3248 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3249 src_m->object, src_m->offset,
3250 dest_m->object, dest_m->offset,
3251 0);
3252
3253 VM_PAGE_CHECK(src_m);
3254 VM_PAGE_CHECK(dest_m);
3255
3256 /*
3257 * ENCRYPTED SWAP:
3258 * The source page should not be encrypted at this point.
3259 * The destination page will therefore not contain encrypted
3260 * data after the copy.
3261 */
3262 if (src_m->encrypted) {
3263 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3264 }
3265 dest_m->encrypted = FALSE;
3266
3267 if (src_m->object != VM_OBJECT_NULL &&
3268 src_m->object->code_signed) {
3269 /*
3270 * We're copying a page from a code-signed object.
3271 * Whoever ends up mapping the copy page might care about
3272 * the original page's integrity, so let's validate the
3273 * source page now.
3274 */
3275 vm_page_copy_cs_validations++;
3276 vm_page_validate_cs(src_m);
3277 }
3278
3279 if (vm_page_is_slideable(src_m)) {
3280 boolean_t was_busy = src_m->busy;
3281 src_m->busy = TRUE;
3282 (void) vm_page_slide(src_m, 0);
3283 assert(src_m->busy);
3284 if(!was_busy) {
3285 PAGE_WAKEUP_DONE(src_m);
3286 }
3287 }
3288
3289 /*
3290 * Propagate the cs_tainted bit to the copy page. Do not propagate
3291 * the cs_validated bit.
3292 */
3293 dest_m->cs_tainted = src_m->cs_tainted;
3294 if (dest_m->cs_tainted) {
3295 vm_page_copy_cs_tainted++;
3296 }
3297 dest_m->slid = src_m->slid;
3298 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3299 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3300 }
3301
3302 #if MACH_ASSERT
3303 static void
3304 _vm_page_print(
3305 vm_page_t p)
3306 {
3307 printf("vm_page %p: \n", p);
3308 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3309 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3310 printf(" next=%p\n", p->next);
3311 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3312 printf(" wire_count=%u\n", p->wire_count);
3313
3314 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3315 (p->local ? "" : "!"),
3316 (p->inactive ? "" : "!"),
3317 (p->active ? "" : "!"),
3318 (p->pageout_queue ? "" : "!"),
3319 (p->speculative ? "" : "!"),
3320 (p->laundry ? "" : "!"));
3321 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3322 (p->free ? "" : "!"),
3323 (p->reference ? "" : "!"),
3324 (p->gobbled ? "" : "!"),
3325 (p->private ? "" : "!"),
3326 (p->throttled ? "" : "!"));
3327 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3328 (p->busy ? "" : "!"),
3329 (p->wanted ? "" : "!"),
3330 (p->tabled ? "" : "!"),
3331 (p->fictitious ? "" : "!"),
3332 (p->pmapped ? "" : "!"),
3333 (p->wpmapped ? "" : "!"));
3334 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3335 (p->pageout ? "" : "!"),
3336 (p->absent ? "" : "!"),
3337 (p->error ? "" : "!"),
3338 (p->dirty ? "" : "!"),
3339 (p->cleaning ? "" : "!"),
3340 (p->precious ? "" : "!"),
3341 (p->clustered ? "" : "!"));
3342 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3343 (p->overwriting ? "" : "!"),
3344 (p->restart ? "" : "!"),
3345 (p->unusual ? "" : "!"),
3346 (p->encrypted ? "" : "!"),
3347 (p->encrypted_cleaning ? "" : "!"));
3348 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3349 (p->list_req_pending ? "" : "!"),
3350 (p->dump_cleaning ? "" : "!"),
3351 (p->cs_validated ? "" : "!"),
3352 (p->cs_tainted ? "" : "!"),
3353 (p->no_cache ? "" : "!"));
3354 printf(" %szero_fill\n",
3355 (p->zero_fill ? "" : "!"));
3356
3357 printf("phys_page=0x%x\n", p->phys_page);
3358 }
3359
3360 /*
3361 * Check that the list of pages is ordered by
3362 * ascending physical address and has no holes.
3363 */
3364 static int
3365 vm_page_verify_contiguous(
3366 vm_page_t pages,
3367 unsigned int npages)
3368 {
3369 register vm_page_t m;
3370 unsigned int page_count;
3371 vm_offset_t prev_addr;
3372
3373 prev_addr = pages->phys_page;
3374 page_count = 1;
3375 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3376 if (m->phys_page != prev_addr + 1) {
3377 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3378 m, (long)prev_addr, m->phys_page);
3379 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3380 panic("vm_page_verify_contiguous: not contiguous!");
3381 }
3382 prev_addr = m->phys_page;
3383 ++page_count;
3384 }
3385 if (page_count != npages) {
3386 printf("pages %p actual count 0x%x but requested 0x%x\n",
3387 pages, page_count, npages);
3388 panic("vm_page_verify_contiguous: count error");
3389 }
3390 return 1;
3391 }
3392
3393
3394 /*
3395 * Check the free lists for proper length etc.
3396 */
3397 static unsigned int
3398 vm_page_verify_free_list(
3399 queue_head_t *vm_page_queue,
3400 unsigned int color,
3401 vm_page_t look_for_page,
3402 boolean_t expect_page)
3403 {
3404 unsigned int npages;
3405 vm_page_t m;
3406 vm_page_t prev_m;
3407 boolean_t found_page;
3408
3409 found_page = FALSE;
3410 npages = 0;
3411 prev_m = (vm_page_t) vm_page_queue;
3412 queue_iterate(vm_page_queue,
3413 m,
3414 vm_page_t,
3415 pageq) {
3416
3417 if (m == look_for_page) {
3418 found_page = TRUE;
3419 }
3420 if ((vm_page_t) m->pageq.prev != prev_m)
3421 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3422 color, npages, m, m->pageq.prev, prev_m);
3423 if ( ! m->busy )
3424 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3425 color, npages, m);
3426 if (color != (unsigned int) -1) {
3427 if ((m->phys_page & vm_color_mask) != color)
3428 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3429 color, npages, m, m->phys_page & vm_color_mask, color);
3430 if ( ! m->free )
3431 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3432 color, npages, m);
3433 }
3434 ++npages;
3435 prev_m = m;
3436 }
3437 if (look_for_page != VM_PAGE_NULL) {
3438 unsigned int other_color;
3439
3440 if (expect_page && !found_page) {
3441 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3442 color, npages, look_for_page, look_for_page->phys_page);
3443 _vm_page_print(look_for_page);
3444 for (other_color = 0;
3445 other_color < vm_colors;
3446 other_color++) {
3447 if (other_color == color)
3448 continue;
3449 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3450 other_color, look_for_page, FALSE);
3451 }
3452 if (color == (unsigned int) -1) {
3453 vm_page_verify_free_list(&vm_lopage_queue_free,
3454 (unsigned int) -1, look_for_page, FALSE);
3455 }
3456 panic("vm_page_verify_free_list(color=%u)\n", color);
3457 }
3458 if (!expect_page && found_page) {
3459 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3460 color, npages, look_for_page, look_for_page->phys_page);
3461 }
3462 }
3463 return npages;
3464 }
3465
3466 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3467 static void
3468 vm_page_verify_free_lists( void )
3469 {
3470 unsigned int color, npages, nlopages;
3471
3472 if (! vm_page_verify_free_lists_enabled)
3473 return;
3474
3475 npages = 0;
3476
3477 lck_mtx_lock(&vm_page_queue_free_lock);
3478
3479 for( color = 0; color < vm_colors; color++ ) {
3480 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3481 color, VM_PAGE_NULL, FALSE);
3482 }
3483 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3484 (unsigned int) -1,
3485 VM_PAGE_NULL, FALSE);
3486 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3487 panic("vm_page_verify_free_lists: "
3488 "npages %u free_count %d nlopages %u lo_free_count %u",
3489 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3490
3491 lck_mtx_unlock(&vm_page_queue_free_lock);
3492 }
3493
3494 void
3495 vm_page_queues_assert(
3496 vm_page_t mem,
3497 int val)
3498 {
3499 if (mem->free + mem->active + mem->inactive + mem->speculative +
3500 mem->throttled + mem->pageout_queue > (val)) {
3501 _vm_page_print(mem);
3502 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3503 }
3504 if (VM_PAGE_WIRED(mem)) {
3505 assert(!mem->active);
3506 assert(!mem->inactive);
3507 assert(!mem->speculative);
3508 assert(!mem->throttled);
3509 }
3510 }
3511 #endif /* MACH_ASSERT */
3512
3513
3514 /*
3515 * CONTIGUOUS PAGE ALLOCATION
3516 *
3517 * Find a region large enough to contain at least n pages
3518 * of contiguous physical memory.
3519 *
3520 * This is done by traversing the vm_page_t array in a linear fashion
3521 * we assume that the vm_page_t array has the avaiable physical pages in an
3522 * ordered, ascending list... this is currently true of all our implementations
3523 * and must remain so... there can be 'holes' in the array... we also can
3524 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3525 * which use to happen via 'vm_page_convert'... that function was no longer
3526 * being called and was removed...
3527 *
3528 * The basic flow consists of stabilizing some of the interesting state of
3529 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3530 * sweep at the beginning of the array looking for pages that meet our criterea
3531 * for a 'stealable' page... currently we are pretty conservative... if the page
3532 * meets this criterea and is physically contiguous to the previous page in the 'run'
3533 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3534 * and start to develop a new run... if at this point we've already considered
3535 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3536 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3537 * to other threads trying to acquire free pages (or move pages from q to q),
3538 * and then continue from the spot we left off... we only make 1 pass through the
3539 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3540 * which steals the pages from the queues they're currently on... pages on the free
3541 * queue can be stolen directly... pages that are on any of the other queues
3542 * must be removed from the object they are tabled on... this requires taking the
3543 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3544 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3545 * dump the pages we've currently stolen back to the free list, and pick up our
3546 * scan from the point where we aborted the 'current' run.
3547 *
3548 *
3549 * Requirements:
3550 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3551 *
3552 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3553 *
3554 * Algorithm:
3555 */
3556
3557 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3558
3559
3560 #define RESET_STATE_OF_RUN() \
3561 MACRO_BEGIN \
3562 prevcontaddr = -2; \
3563 start_pnum = -1; \
3564 free_considered = 0; \
3565 substitute_needed = 0; \
3566 npages = 0; \
3567 MACRO_END
3568
3569 /*
3570 * Can we steal in-use (i.e. not free) pages when searching for
3571 * physically-contiguous pages ?
3572 */
3573 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3574
3575 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3576 #if DEBUG
3577 int vm_page_find_contig_debug = 0;
3578 #endif
3579
3580 static vm_page_t
3581 vm_page_find_contiguous(
3582 unsigned int contig_pages,
3583 ppnum_t max_pnum,
3584 ppnum_t pnum_mask,
3585 boolean_t wire,
3586 int flags)
3587 {
3588 vm_page_t m = NULL;
3589 ppnum_t prevcontaddr;
3590 ppnum_t start_pnum;
3591 unsigned int npages, considered, scanned;
3592 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3593 unsigned int idx_last_contig_page_found = 0;
3594 int free_considered, free_available;
3595 int substitute_needed;
3596 boolean_t wrapped;
3597 #if DEBUG
3598 clock_sec_t tv_start_sec, tv_end_sec;
3599 clock_usec_t tv_start_usec, tv_end_usec;
3600 #endif
3601 #if MACH_ASSERT
3602 int yielded = 0;
3603 int dumped_run = 0;
3604 int stolen_pages = 0;
3605 #endif
3606
3607 if (contig_pages == 0)
3608 return VM_PAGE_NULL;
3609
3610 #if MACH_ASSERT
3611 vm_page_verify_free_lists();
3612 #endif
3613 #if DEBUG
3614 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3615 #endif
3616 vm_page_lock_queues();
3617 lck_mtx_lock(&vm_page_queue_free_lock);
3618
3619 RESET_STATE_OF_RUN();
3620
3621 scanned = 0;
3622 considered = 0;
3623 free_available = vm_page_free_count - vm_page_free_reserved;
3624
3625 wrapped = FALSE;
3626
3627 if(flags & KMA_LOMEM)
3628 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3629 else
3630 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3631
3632 orig_last_idx = idx_last_contig_page_found;
3633 last_idx = orig_last_idx;
3634
3635 for (page_idx = last_idx, start_idx = last_idx;
3636 npages < contig_pages && page_idx < vm_pages_count;
3637 page_idx++) {
3638 retry:
3639 if (wrapped &&
3640 npages == 0 &&
3641 page_idx >= orig_last_idx) {
3642 /*
3643 * We're back where we started and we haven't
3644 * found any suitable contiguous range. Let's
3645 * give up.
3646 */
3647 break;
3648 }
3649 scanned++;
3650 m = &vm_pages[page_idx];
3651
3652 assert(!m->fictitious);
3653 assert(!m->private);
3654
3655 if (max_pnum && m->phys_page > max_pnum) {
3656 /* no more low pages... */
3657 break;
3658 }
3659 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3660 /*
3661 * not aligned
3662 */
3663 RESET_STATE_OF_RUN();
3664
3665 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3666 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3667 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3668 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3669 m->pageout) {
3670 /*
3671 * page is in a transient state
3672 * or a state we don't want to deal
3673 * with, so don't consider it which
3674 * means starting a new run
3675 */
3676 RESET_STATE_OF_RUN();
3677
3678 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3679 /*
3680 * page needs to be on one of our queues
3681 * in order for it to be stable behind the
3682 * locks we hold at this point...
3683 * if not, don't consider it which
3684 * means starting a new run
3685 */
3686 RESET_STATE_OF_RUN();
3687
3688 } else if (!m->free && (!m->tabled || m->busy)) {
3689 /*
3690 * pages on the free list are always 'busy'
3691 * so we couldn't test for 'busy' in the check
3692 * for the transient states... pages that are
3693 * 'free' are never 'tabled', so we also couldn't
3694 * test for 'tabled'. So we check here to make
3695 * sure that a non-free page is not busy and is
3696 * tabled on an object...
3697 * if not, don't consider it which
3698 * means starting a new run
3699 */
3700 RESET_STATE_OF_RUN();
3701
3702 } else {
3703 if (m->phys_page != prevcontaddr + 1) {
3704 if ((m->phys_page & pnum_mask) != 0) {
3705 RESET_STATE_OF_RUN();
3706 goto did_consider;
3707 } else {
3708 npages = 1;
3709 start_idx = page_idx;
3710 start_pnum = m->phys_page;
3711 }
3712 } else {
3713 npages++;
3714 }
3715 prevcontaddr = m->phys_page;
3716
3717 VM_PAGE_CHECK(m);
3718 if (m->free) {
3719 free_considered++;
3720 } else {
3721 /*
3722 * This page is not free.
3723 * If we can't steal used pages,
3724 * we have to give up this run
3725 * and keep looking.
3726 * Otherwise, we might need to
3727 * move the contents of this page
3728 * into a substitute page.
3729 */
3730 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3731 if (m->pmapped || m->dirty) {
3732 substitute_needed++;
3733 }
3734 #else
3735 RESET_STATE_OF_RUN();
3736 #endif
3737 }
3738
3739 if ((free_considered + substitute_needed) > free_available) {
3740 /*
3741 * if we let this run continue
3742 * we will end up dropping the vm_page_free_count
3743 * below the reserve limit... we need to abort
3744 * this run, but we can at least re-consider this
3745 * page... thus the jump back to 'retry'
3746 */
3747 RESET_STATE_OF_RUN();
3748
3749 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3750 considered++;
3751 goto retry;
3752 }
3753 /*
3754 * free_available == 0
3755 * so can't consider any free pages... if
3756 * we went to retry in this case, we'd
3757 * get stuck looking at the same page
3758 * w/o making any forward progress
3759 * we also want to take this path if we've already
3760 * reached our limit that controls the lock latency
3761 */
3762 }
3763 }
3764 did_consider:
3765 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3766
3767 lck_mtx_unlock(&vm_page_queue_free_lock);
3768 vm_page_unlock_queues();
3769
3770 mutex_pause(0);
3771
3772 vm_page_lock_queues();
3773 lck_mtx_lock(&vm_page_queue_free_lock);
3774
3775 RESET_STATE_OF_RUN();
3776 /*
3777 * reset our free page limit since we
3778 * dropped the lock protecting the vm_page_free_queue
3779 */
3780 free_available = vm_page_free_count - vm_page_free_reserved;
3781 considered = 0;
3782 #if MACH_ASSERT
3783 yielded++;
3784 #endif
3785 goto retry;
3786 }
3787 considered++;
3788 }
3789 m = VM_PAGE_NULL;
3790
3791 if (npages != contig_pages) {
3792 if (!wrapped) {
3793 /*
3794 * We didn't find a contiguous range but we didn't
3795 * start from the very first page.
3796 * Start again from the very first page.
3797 */
3798 RESET_STATE_OF_RUN();
3799 if( flags & KMA_LOMEM)
3800 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3801 else
3802 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3803 last_idx = 0;
3804 page_idx = last_idx;
3805 wrapped = TRUE;
3806 goto retry;
3807 }
3808 lck_mtx_unlock(&vm_page_queue_free_lock);
3809 } else {
3810 vm_page_t m1;
3811 vm_page_t m2;
3812 unsigned int cur_idx;
3813 unsigned int tmp_start_idx;
3814 vm_object_t locked_object = VM_OBJECT_NULL;
3815 boolean_t abort_run = FALSE;
3816
3817 assert(page_idx - start_idx == contig_pages);
3818
3819 tmp_start_idx = start_idx;
3820
3821 /*
3822 * first pass through to pull the free pages
3823 * off of the free queue so that in case we
3824 * need substitute pages, we won't grab any
3825 * of the free pages in the run... we'll clear
3826 * the 'free' bit in the 2nd pass, and even in
3827 * an abort_run case, we'll collect all of the
3828 * free pages in this run and return them to the free list
3829 */
3830 while (start_idx < page_idx) {
3831
3832 m1 = &vm_pages[start_idx++];
3833
3834 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3835 assert(m1->free);
3836 #endif
3837
3838 if (m1->free) {
3839 unsigned int color;
3840
3841 color = m1->phys_page & vm_color_mask;
3842 #if MACH_ASSERT
3843 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
3844 #endif
3845 queue_remove(&vm_page_queue_free[color],
3846 m1,
3847 vm_page_t,
3848 pageq);
3849 m1->pageq.next = NULL;
3850 m1->pageq.prev = NULL;
3851 #if MACH_ASSERT
3852 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
3853 #endif
3854 /*
3855 * Clear the "free" bit so that this page
3856 * does not get considered for another
3857 * concurrent physically-contiguous allocation.
3858 */
3859 m1->free = FALSE;
3860 assert(m1->busy);
3861
3862 vm_page_free_count--;
3863 }
3864 }
3865 /*
3866 * adjust global freelist counts
3867 */
3868 if (vm_page_free_count < vm_page_free_count_minimum)
3869 vm_page_free_count_minimum = vm_page_free_count;
3870
3871 if( flags & KMA_LOMEM)
3872 vm_page_lomem_find_contiguous_last_idx = page_idx;
3873 else
3874 vm_page_find_contiguous_last_idx = page_idx;
3875
3876 /*
3877 * we can drop the free queue lock at this point since
3878 * we've pulled any 'free' candidates off of the list
3879 * we need it dropped so that we can do a vm_page_grab
3880 * when substituing for pmapped/dirty pages
3881 */
3882 lck_mtx_unlock(&vm_page_queue_free_lock);
3883
3884 start_idx = tmp_start_idx;
3885 cur_idx = page_idx - 1;
3886
3887 while (start_idx++ < page_idx) {
3888 /*
3889 * must go through the list from back to front
3890 * so that the page list is created in the
3891 * correct order - low -> high phys addresses
3892 */
3893 m1 = &vm_pages[cur_idx--];
3894
3895 assert(!m1->free);
3896 if (m1->object == VM_OBJECT_NULL) {
3897 /*
3898 * page has already been removed from
3899 * the free list in the 1st pass
3900 */
3901 assert(m1->offset == (vm_object_offset_t) -1);
3902 assert(m1->busy);
3903 assert(!m1->wanted);
3904 assert(!m1->laundry);
3905 } else {
3906 vm_object_t object;
3907
3908 if (abort_run == TRUE)
3909 continue;
3910
3911 object = m1->object;
3912
3913 if (object != locked_object) {
3914 if (locked_object) {
3915 vm_object_unlock(locked_object);
3916 locked_object = VM_OBJECT_NULL;
3917 }
3918 if (vm_object_lock_try(object))
3919 locked_object = object;
3920 }
3921 if (locked_object == VM_OBJECT_NULL ||
3922 (VM_PAGE_WIRED(m1) || m1->gobbled ||
3923 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3924 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3925 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3926
3927 if (locked_object) {
3928 vm_object_unlock(locked_object);
3929 locked_object = VM_OBJECT_NULL;
3930 }
3931 tmp_start_idx = cur_idx;
3932 abort_run = TRUE;
3933 continue;
3934 }
3935 if (m1->pmapped || m1->dirty) {
3936 int refmod;
3937 vm_object_offset_t offset;
3938
3939 m2 = vm_page_grab();
3940
3941 if (m2 == VM_PAGE_NULL) {
3942 if (locked_object) {
3943 vm_object_unlock(locked_object);
3944 locked_object = VM_OBJECT_NULL;
3945 }
3946 tmp_start_idx = cur_idx;
3947 abort_run = TRUE;
3948 continue;
3949 }
3950 if (m1->pmapped)
3951 refmod = pmap_disconnect(m1->phys_page);
3952 else
3953 refmod = 0;
3954 vm_page_copy(m1, m2);
3955
3956 m2->reference = m1->reference;
3957 m2->dirty = m1->dirty;
3958
3959 if (refmod & VM_MEM_REFERENCED)
3960 m2->reference = TRUE;
3961 if (refmod & VM_MEM_MODIFIED)
3962 m2->dirty = TRUE;
3963 offset = m1->offset;
3964
3965 /*
3966 * completely cleans up the state
3967 * of the page so that it is ready
3968 * to be put onto the free list, or
3969 * for this purpose it looks like it
3970 * just came off of the free list
3971 */
3972 vm_page_free_prepare(m1);
3973
3974 /*
3975 * make sure we clear the ref/mod state
3976 * from the pmap layer... else we risk
3977 * inheriting state from the last time
3978 * this page was used...
3979 */
3980 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
3981 /*
3982 * now put the substitute page on the object
3983 */
3984 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
3985
3986 if (m2->reference)
3987 vm_page_activate(m2);
3988 else
3989 vm_page_deactivate(m2);
3990
3991 PAGE_WAKEUP_DONE(m2);
3992
3993 } else {
3994 /*
3995 * completely cleans up the state
3996 * of the page so that it is ready
3997 * to be put onto the free list, or
3998 * for this purpose it looks like it
3999 * just came off of the free list
4000 */
4001 vm_page_free_prepare(m1);
4002 }
4003 #if MACH_ASSERT
4004 stolen_pages++;
4005 #endif
4006 }
4007 m1->pageq.next = (queue_entry_t) m;
4008 m1->pageq.prev = NULL;
4009 m = m1;
4010 }
4011 if (locked_object) {
4012 vm_object_unlock(locked_object);
4013 locked_object = VM_OBJECT_NULL;
4014 }
4015
4016 if (abort_run == TRUE) {
4017 if (m != VM_PAGE_NULL) {
4018 vm_page_free_list(m, FALSE);
4019 }
4020 #if MACH_ASSERT
4021 dumped_run++;
4022 #endif
4023 /*
4024 * want the index of the last
4025 * page in this run that was
4026 * successfully 'stolen', so back
4027 * it up 1 for the auto-decrement on use
4028 * and 1 more to bump back over this page
4029 */
4030 page_idx = tmp_start_idx + 2;
4031 if (page_idx >= vm_pages_count) {
4032 if (wrapped)
4033 goto done_scanning;
4034 page_idx = last_idx = 0;
4035 wrapped = TRUE;
4036 }
4037 abort_run = FALSE;
4038
4039 /*
4040 * We didn't find a contiguous range but we didn't
4041 * start from the very first page.
4042 * Start again from the very first page.
4043 */
4044 RESET_STATE_OF_RUN();
4045
4046 if( flags & KMA_LOMEM)
4047 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4048 else
4049 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4050
4051 last_idx = page_idx;
4052
4053 lck_mtx_lock(&vm_page_queue_free_lock);
4054 /*
4055 * reset our free page limit since we
4056 * dropped the lock protecting the vm_page_free_queue
4057 */
4058 free_available = vm_page_free_count - vm_page_free_reserved;
4059 goto retry;
4060 }
4061
4062 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4063
4064 if (wire == TRUE)
4065 m1->wire_count++;
4066 else
4067 m1->gobbled = TRUE;
4068 }
4069 if (wire == FALSE)
4070 vm_page_gobble_count += npages;
4071
4072 /*
4073 * gobbled pages are also counted as wired pages
4074 */
4075 vm_page_wire_count += npages;
4076
4077 assert(vm_page_verify_contiguous(m, npages));
4078 }
4079 done_scanning:
4080 vm_page_unlock_queues();
4081
4082 #if DEBUG
4083 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4084
4085 tv_end_sec -= tv_start_sec;
4086 if (tv_end_usec < tv_start_usec) {
4087 tv_end_sec--;
4088 tv_end_usec += 1000000;
4089 }
4090 tv_end_usec -= tv_start_usec;
4091 if (tv_end_usec >= 1000000) {
4092 tv_end_sec++;
4093 tv_end_sec -= 1000000;
4094 }
4095 if (vm_page_find_contig_debug) {
4096 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4097 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4098 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4099 scanned, yielded, dumped_run, stolen_pages);
4100 }
4101
4102 #endif
4103 #if MACH_ASSERT
4104 vm_page_verify_free_lists();
4105 #endif
4106 return m;
4107 }
4108
4109 /*
4110 * Allocate a list of contiguous, wired pages.
4111 */
4112 kern_return_t
4113 cpm_allocate(
4114 vm_size_t size,
4115 vm_page_t *list,
4116 ppnum_t max_pnum,
4117 ppnum_t pnum_mask,
4118 boolean_t wire,
4119 int flags)
4120 {
4121 vm_page_t pages;
4122 unsigned int npages;
4123
4124 if (size % PAGE_SIZE != 0)
4125 return KERN_INVALID_ARGUMENT;
4126
4127 npages = (unsigned int) (size / PAGE_SIZE);
4128 if (npages != size / PAGE_SIZE) {
4129 /* 32-bit overflow */
4130 return KERN_INVALID_ARGUMENT;
4131 }
4132
4133 /*
4134 * Obtain a pointer to a subset of the free
4135 * list large enough to satisfy the request;
4136 * the region will be physically contiguous.
4137 */
4138 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4139
4140 if (pages == VM_PAGE_NULL)
4141 return KERN_NO_SPACE;
4142 /*
4143 * determine need for wakeups
4144 */
4145 if ((vm_page_free_count < vm_page_free_min) ||
4146 ((vm_page_free_count < vm_page_free_target) &&
4147 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4148 thread_wakeup((event_t) &vm_page_free_wanted);
4149
4150 VM_CHECK_MEMORYSTATUS;
4151
4152 /*
4153 * The CPM pages should now be available and
4154 * ordered by ascending physical address.
4155 */
4156 assert(vm_page_verify_contiguous(pages, npages));
4157
4158 *list = pages;
4159 return KERN_SUCCESS;
4160 }
4161
4162
4163 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4164
4165 /*
4166 * when working on a 'run' of pages, it is necessary to hold
4167 * the vm_page_queue_lock (a hot global lock) for certain operations
4168 * on the page... however, the majority of the work can be done
4169 * while merely holding the object lock... in fact there are certain
4170 * collections of pages that don't require any work brokered by the
4171 * vm_page_queue_lock... to mitigate the time spent behind the global
4172 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4173 * while doing all of the work that doesn't require the vm_page_queue_lock...
4174 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4175 * necessary work for each page... we will grab the busy bit on the page
4176 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4177 * if it can't immediately take the vm_page_queue_lock in order to compete
4178 * for the locks in the same order that vm_pageout_scan takes them.
4179 * the operation names are modeled after the names of the routines that
4180 * need to be called in order to make the changes very obvious in the
4181 * original loop
4182 */
4183
4184 void
4185 vm_page_do_delayed_work(
4186 vm_object_t object,
4187 struct vm_page_delayed_work *dwp,
4188 int dw_count)
4189 {
4190 int j;
4191 vm_page_t m;
4192 vm_page_t local_free_q = VM_PAGE_NULL;
4193 boolean_t dropped_obj_lock = FALSE;
4194
4195 /*
4196 * pageout_scan takes the vm_page_lock_queues first
4197 * then tries for the object lock... to avoid what
4198 * is effectively a lock inversion, we'll go to the
4199 * trouble of taking them in that same order... otherwise
4200 * if this object contains the majority of the pages resident
4201 * in the UBC (or a small set of large objects actively being
4202 * worked on contain the majority of the pages), we could
4203 * cause the pageout_scan thread to 'starve' in its attempt
4204 * to find pages to move to the free queue, since it has to
4205 * successfully acquire the object lock of any candidate page
4206 * before it can steal/clean it.
4207 */
4208 if (!vm_page_trylockspin_queues()) {
4209 vm_object_unlock(object);
4210
4211 vm_page_lockspin_queues();
4212
4213 for (j = 0; ; j++) {
4214 if (!vm_object_lock_avoid(object) &&
4215 _vm_object_lock_try(object))
4216 break;
4217 vm_page_unlock_queues();
4218 mutex_pause(j);
4219 vm_page_lockspin_queues();
4220 }
4221 dropped_obj_lock = TRUE;
4222 }
4223 for (j = 0; j < dw_count; j++, dwp++) {
4224
4225 m = dwp->dw_m;
4226
4227 if (dwp->dw_mask & DW_set_list_req_pending) {
4228 m->list_req_pending = TRUE;
4229
4230 if (dropped_obj_lock == TRUE) {
4231 /*
4232 * need to make sure anyone that might have
4233 * blocked on busy == TRUE when we dropped
4234 * the object lock gets a chance to re-evaluate
4235 * its state since we have several places
4236 * where we avoid potential deadlocks with
4237 * the fileysystem by stealing pages with
4238 * list_req_pending == TRUE and busy == TRUE
4239 */
4240 dwp->dw_mask |= DW_PAGE_WAKEUP;
4241 }
4242 }
4243 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4244 vm_pageout_throttle_up(m);
4245
4246 if (dwp->dw_mask & DW_vm_page_wire)
4247 vm_page_wire(m);
4248 else if (dwp->dw_mask & DW_vm_page_unwire) {
4249 boolean_t queueit;
4250
4251 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4252
4253 vm_page_unwire(m, queueit);
4254 }
4255 if (dwp->dw_mask & DW_vm_page_free) {
4256 vm_page_free_prepare_queues(m);
4257
4258 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4259 /*
4260 * Add this page to our list of reclaimed pages,
4261 * to be freed later.
4262 */
4263 m->pageq.next = (queue_entry_t) local_free_q;
4264 local_free_q = m;
4265 } else {
4266 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4267 vm_page_deactivate_internal(m, FALSE);
4268 else if (dwp->dw_mask & DW_vm_page_activate) {
4269 if (m->active == FALSE) {
4270 vm_page_activate(m);
4271 }
4272 }
4273 else if (dwp->dw_mask & DW_vm_page_speculate)
4274 vm_page_speculate(m, TRUE);
4275 else if (dwp->dw_mask & DW_vm_page_lru)
4276 vm_page_lru(m);
4277 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE)
4278 VM_PAGE_QUEUES_REMOVE(m);
4279
4280 if (dwp->dw_mask & DW_set_reference)
4281 m->reference = TRUE;
4282 else if (dwp->dw_mask & DW_clear_reference)
4283 m->reference = FALSE;
4284
4285 if (dwp->dw_mask & DW_move_page) {
4286 VM_PAGE_QUEUES_REMOVE(m);
4287
4288 assert(!m->laundry);
4289 assert(m->object != kernel_object);
4290 assert(m->pageq.next == NULL &&
4291 m->pageq.prev == NULL);
4292
4293 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4294 }
4295 if (dwp->dw_mask & DW_clear_busy)
4296 m->busy = FALSE;
4297
4298 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4299 PAGE_WAKEUP(m);
4300 }
4301 }
4302 vm_page_unlock_queues();
4303
4304 if (local_free_q)
4305 vm_page_free_list(local_free_q, TRUE);
4306
4307 VM_CHECK_MEMORYSTATUS;
4308
4309 }
4310
4311
4312
4313
4314 void vm_check_memorystatus()
4315 {
4316 #if CONFIG_EMBEDDED
4317 static boolean_t in_critical = FALSE;
4318 static unsigned int last_memorystatus = 0;
4319 unsigned int pages_avail;
4320
4321 if (!kern_memorystatus_delta) {
4322 return;
4323 }
4324
4325 pages_avail = (vm_page_active_count +
4326 vm_page_inactive_count +
4327 vm_page_speculative_count +
4328 vm_page_free_count +
4329 (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) ? 0 : vm_page_purgeable_count));
4330 if ( (!in_critical && (pages_avail < kern_memorystatus_delta)) ||
4331 (pages_avail >= (last_memorystatus + kern_memorystatus_delta)) ||
4332 (last_memorystatus >= (pages_avail + kern_memorystatus_delta)) ) {
4333 kern_memorystatus_level = pages_avail * 100 / atop_64(max_mem);
4334 last_memorystatus = pages_avail;
4335
4336 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4337
4338 in_critical = (pages_avail < kern_memorystatus_delta) ? TRUE : FALSE;
4339 }
4340 #endif
4341 }
4342
4343 kern_return_t
4344 vm_page_alloc_list(
4345 int page_count,
4346 int flags,
4347 vm_page_t *list)
4348 {
4349 vm_page_t lo_page_list = VM_PAGE_NULL;
4350 vm_page_t mem;
4351 int i;
4352
4353 if ( !(flags & KMA_LOMEM))
4354 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4355
4356 for (i = 0; i < page_count; i++) {
4357
4358 mem = vm_page_grablo();
4359
4360 if (mem == VM_PAGE_NULL) {
4361 if (lo_page_list)
4362 vm_page_free_list(lo_page_list, FALSE);
4363
4364 *list = VM_PAGE_NULL;
4365
4366 return (KERN_RESOURCE_SHORTAGE);
4367 }
4368 mem->pageq.next = (queue_entry_t) lo_page_list;
4369 lo_page_list = mem;
4370 }
4371 *list = lo_page_list;
4372
4373 return (KERN_SUCCESS);
4374 }
4375
4376 void
4377 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4378 {
4379 page->offset = offset;
4380 }
4381
4382 vm_page_t
4383 vm_page_get_next(vm_page_t page)
4384 {
4385 return ((vm_page_t) page->pageq.next);
4386 }
4387
4388 vm_object_offset_t
4389 vm_page_get_offset(vm_page_t page)
4390 {
4391 return (page->offset);
4392 }
4393
4394 ppnum_t
4395 vm_page_get_phys_page(vm_page_t page)
4396 {
4397 return (page->phys_page);
4398 }
4399
4400
4401 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4402
4403 #if HIBERNATION
4404
4405 static vm_page_t hibernate_gobble_queue;
4406
4407 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4408
4409 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4410 static int hibernate_flush_dirty_pages(void);
4411 static int hibernate_flush_queue(queue_head_t *, int);
4412 static void hibernate_dirty_page(vm_page_t);
4413
4414 void hibernate_flush_wait(void);
4415 void hibernate_mark_in_progress(void);
4416 void hibernate_clear_in_progress(void);
4417
4418
4419 struct hibernate_statistics {
4420 int hibernate_considered;
4421 int hibernate_reentered_on_q;
4422 int hibernate_found_dirty;
4423 int hibernate_skipped_cleaning;
4424 int hibernate_skipped_transient;
4425 int hibernate_skipped_precious;
4426 int hibernate_queue_nolock;
4427 int hibernate_queue_paused;
4428 int hibernate_throttled;
4429 int hibernate_throttle_timeout;
4430 int hibernate_drained;
4431 int hibernate_drain_timeout;
4432 int cd_lock_failed;
4433 int cd_found_precious;
4434 int cd_found_wired;
4435 int cd_found_busy;
4436 int cd_found_unusual;
4437 int cd_found_cleaning;
4438 int cd_found_laundry;
4439 int cd_found_dirty;
4440 int cd_local_free;
4441 int cd_total_free;
4442 int cd_vm_page_wire_count;
4443 int cd_pages;
4444 int cd_discarded;
4445 int cd_count_wire;
4446 } hibernate_stats;
4447
4448
4449
4450 static int
4451 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4452 {
4453 wait_result_t wait_result;
4454
4455 vm_page_lock_queues();
4456
4457 while (q->pgo_laundry) {
4458
4459 q->pgo_draining = TRUE;
4460
4461 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4462
4463 vm_page_unlock_queues();
4464
4465 wait_result = thread_block(THREAD_CONTINUE_NULL);
4466
4467 if (wait_result == THREAD_TIMED_OUT) {
4468 hibernate_stats.hibernate_drain_timeout++;
4469 return (1);
4470 }
4471 vm_page_lock_queues();
4472
4473 hibernate_stats.hibernate_drained++;
4474 }
4475 vm_page_unlock_queues();
4476
4477 return (0);
4478 }
4479
4480 static void
4481 hibernate_dirty_page(vm_page_t m)
4482 {
4483 vm_object_t object = m->object;
4484 struct vm_pageout_queue *q;
4485
4486 #if DEBUG
4487 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4488 #endif
4489 vm_object_lock_assert_exclusive(object);
4490
4491 /*
4492 * protect the object from collapse -
4493 * locking in the object's paging_offset.
4494 */
4495 vm_object_paging_begin(object);
4496
4497 m->list_req_pending = TRUE;
4498 m->cleaning = TRUE;
4499 m->busy = TRUE;
4500
4501 if (object->internal == TRUE)
4502 q = &vm_pageout_queue_internal;
4503 else
4504 q = &vm_pageout_queue_external;
4505
4506 /*
4507 * pgo_laundry count is tied to the laundry bit
4508 */
4509 m->laundry = TRUE;
4510 q->pgo_laundry++;
4511
4512 m->pageout_queue = TRUE;
4513 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4514
4515 if (q->pgo_idle == TRUE) {
4516 q->pgo_idle = FALSE;
4517 thread_wakeup((event_t) &q->pgo_pending);
4518 }
4519 }
4520
4521 static int
4522 hibernate_flush_queue(queue_head_t *q, int qcount)
4523 {
4524 vm_page_t m;
4525 vm_object_t l_object = NULL;
4526 vm_object_t m_object = NULL;
4527 int refmod_state = 0;
4528 int try_failed_count = 0;
4529 int retval = 0;
4530 int current_run = 0;
4531 struct vm_pageout_queue *iq;
4532 struct vm_pageout_queue *eq;
4533 struct vm_pageout_queue *tq;
4534
4535
4536 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4537
4538 iq = &vm_pageout_queue_internal;
4539 eq = &vm_pageout_queue_external;
4540
4541 vm_page_lock_queues();
4542
4543 while (qcount && !queue_empty(q)) {
4544
4545 if (current_run++ == 1000) {
4546 if (hibernate_should_abort()) {
4547 retval = 1;
4548 break;
4549 }
4550 current_run = 0;
4551 }
4552
4553 m = (vm_page_t) queue_first(q);
4554 m_object = m->object;
4555
4556 /*
4557 * check to see if we currently are working
4558 * with the same object... if so, we've
4559 * already got the lock
4560 */
4561 if (m_object != l_object) {
4562 /*
4563 * the object associated with candidate page is
4564 * different from the one we were just working
4565 * with... dump the lock if we still own it
4566 */
4567 if (l_object != NULL) {
4568 vm_object_unlock(l_object);
4569 l_object = NULL;
4570 }
4571 /*
4572 * Try to lock object; since we've alread got the
4573 * page queues lock, we can only 'try' for this one.
4574 * if the 'try' fails, we need to do a mutex_pause
4575 * to allow the owner of the object lock a chance to
4576 * run...
4577 */
4578 if ( !vm_object_lock_try_scan(m_object)) {
4579
4580 if (try_failed_count > 20) {
4581 hibernate_stats.hibernate_queue_nolock++;
4582
4583 goto reenter_pg_on_q;
4584 }
4585 vm_pageout_scan_wants_object = m_object;
4586
4587 vm_page_unlock_queues();
4588 mutex_pause(try_failed_count++);
4589 vm_page_lock_queues();
4590
4591 hibernate_stats.hibernate_queue_paused++;
4592 continue;
4593 } else {
4594 l_object = m_object;
4595 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4596 }
4597 }
4598 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4599 /*
4600 * page is not to be cleaned
4601 * put it back on the head of its queue
4602 */
4603 if (m->cleaning)
4604 hibernate_stats.hibernate_skipped_cleaning++;
4605 else
4606 hibernate_stats.hibernate_skipped_transient++;
4607
4608 goto reenter_pg_on_q;
4609 }
4610 if ( !m_object->pager_initialized && m_object->pager_created)
4611 goto reenter_pg_on_q;
4612
4613 if (m_object->copy == VM_OBJECT_NULL) {
4614 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4615 /*
4616 * let the normal hibernate image path
4617 * deal with these
4618 */
4619 goto reenter_pg_on_q;
4620 }
4621 }
4622 if ( !m->dirty && m->pmapped) {
4623 refmod_state = pmap_get_refmod(m->phys_page);
4624
4625 if ((refmod_state & VM_MEM_MODIFIED))
4626 m->dirty = TRUE;
4627 } else
4628 refmod_state = 0;
4629
4630 if ( !m->dirty) {
4631 /*
4632 * page is not to be cleaned
4633 * put it back on the head of its queue
4634 */
4635 if (m->precious)
4636 hibernate_stats.hibernate_skipped_precious++;
4637
4638 goto reenter_pg_on_q;
4639 }
4640 tq = NULL;
4641
4642 if (m_object->internal) {
4643 if (VM_PAGE_Q_THROTTLED(iq))
4644 tq = iq;
4645 } else if (VM_PAGE_Q_THROTTLED(eq))
4646 tq = eq;
4647
4648 if (tq != NULL) {
4649 wait_result_t wait_result;
4650 int wait_count = 5;
4651
4652 if (l_object != NULL) {
4653 vm_object_unlock(l_object);
4654 l_object = NULL;
4655 }
4656 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4657
4658 tq->pgo_throttled = TRUE;
4659
4660 while (retval == 0) {
4661
4662 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4663
4664 vm_page_unlock_queues();
4665
4666 wait_result = thread_block(THREAD_CONTINUE_NULL);
4667
4668 vm_page_lock_queues();
4669
4670 if (hibernate_should_abort())
4671 retval = 1;
4672
4673 if (wait_result != THREAD_TIMED_OUT)
4674 break;
4675
4676 if (--wait_count == 0) {
4677 hibernate_stats.hibernate_throttle_timeout++;
4678 retval = 1;
4679 }
4680 }
4681 if (retval)
4682 break;
4683
4684 hibernate_stats.hibernate_throttled++;
4685
4686 continue;
4687 }
4688 VM_PAGE_QUEUES_REMOVE(m);
4689
4690 hibernate_dirty_page(m);
4691
4692 hibernate_stats.hibernate_found_dirty++;
4693
4694 goto next_pg;
4695
4696 reenter_pg_on_q:
4697 queue_remove(q, m, vm_page_t, pageq);
4698 queue_enter(q, m, vm_page_t, pageq);
4699
4700 hibernate_stats.hibernate_reentered_on_q++;
4701 next_pg:
4702 hibernate_stats.hibernate_considered++;
4703
4704 qcount--;
4705 try_failed_count = 0;
4706 }
4707 if (l_object != NULL) {
4708 vm_object_unlock(l_object);
4709 l_object = NULL;
4710 }
4711 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4712
4713 vm_page_unlock_queues();
4714
4715 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4716
4717 return (retval);
4718 }
4719
4720
4721 static int
4722 hibernate_flush_dirty_pages()
4723 {
4724 struct vm_speculative_age_q *aq;
4725 uint32_t i;
4726
4727 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4728
4729 if (vm_page_local_q) {
4730 for (i = 0; i < vm_page_local_q_count; i++)
4731 vm_page_reactivate_local(i, TRUE, FALSE);
4732 }
4733
4734 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4735 int qcount;
4736 vm_page_t m;
4737
4738 aq = &vm_page_queue_speculative[i];
4739
4740 if (queue_empty(&aq->age_q))
4741 continue;
4742 qcount = 0;
4743
4744 vm_page_lockspin_queues();
4745
4746 queue_iterate(&aq->age_q,
4747 m,
4748 vm_page_t,
4749 pageq)
4750 {
4751 qcount++;
4752 }
4753 vm_page_unlock_queues();
4754
4755 if (qcount) {
4756 if (hibernate_flush_queue(&aq->age_q, qcount))
4757 return (1);
4758 }
4759 }
4760 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4761 return (1);
4762 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4763 return (1);
4764 if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4765 return (1);
4766
4767 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4768 return (1);
4769 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4770 }
4771
4772
4773 extern void IOSleep(unsigned int);
4774 extern int sync_internal(void);
4775
4776 int
4777 hibernate_flush_memory()
4778 {
4779 int retval;
4780
4781 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4782
4783 IOSleep(2 * 1000);
4784
4785 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4786
4787 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4788 if (consider_buffer_cache_collect != NULL) {
4789
4790 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4791
4792 sync_internal();
4793 (void)(*consider_buffer_cache_collect)(1);
4794 consider_zone_gc(TRUE);
4795
4796 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4797 }
4798 }
4799 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4800
4801 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4802 hibernate_stats.hibernate_considered,
4803 hibernate_stats.hibernate_reentered_on_q,
4804 hibernate_stats.hibernate_found_dirty);
4805 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4806 hibernate_stats.hibernate_skipped_cleaning,
4807 hibernate_stats.hibernate_skipped_transient,
4808 hibernate_stats.hibernate_skipped_precious,
4809 hibernate_stats.hibernate_queue_nolock);
4810 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4811 hibernate_stats.hibernate_queue_paused,
4812 hibernate_stats.hibernate_throttled,
4813 hibernate_stats.hibernate_throttle_timeout,
4814 hibernate_stats.hibernate_drained,
4815 hibernate_stats.hibernate_drain_timeout);
4816
4817 return (retval);
4818 }
4819
4820
4821 static void
4822 hibernate_page_list_zero(hibernate_page_list_t *list)
4823 {
4824 uint32_t bank;
4825 hibernate_bitmap_t * bitmap;
4826
4827 bitmap = &list->bank_bitmap[0];
4828 for (bank = 0; bank < list->bank_count; bank++)
4829 {
4830 uint32_t last_bit;
4831
4832 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4833 // set out-of-bound bits at end of bitmap.
4834 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4835 if (last_bit)
4836 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4837
4838 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4839 }
4840 }
4841
4842 void
4843 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4844 {
4845 uint32_t i;
4846 vm_page_t m;
4847 uint64_t start, end, timeout, nsec;
4848 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4849 clock_get_uptime(&start);
4850
4851 for (i = 0; i < gobble_count; i++)
4852 {
4853 while (VM_PAGE_NULL == (m = vm_page_grab()))
4854 {
4855 clock_get_uptime(&end);
4856 if (end >= timeout)
4857 break;
4858 VM_PAGE_WAIT();
4859 }
4860 if (!m)
4861 break;
4862 m->busy = FALSE;
4863 vm_page_gobble(m);
4864
4865 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4866 hibernate_gobble_queue = m;
4867 }
4868
4869 clock_get_uptime(&end);
4870 absolutetime_to_nanoseconds(end - start, &nsec);
4871 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4872 }
4873
4874 void
4875 hibernate_free_gobble_pages(void)
4876 {
4877 vm_page_t m, next;
4878 uint32_t count = 0;
4879
4880 m = (vm_page_t) hibernate_gobble_queue;
4881 while(m)
4882 {
4883 next = (vm_page_t) m->pageq.next;
4884 vm_page_free(m);
4885 count++;
4886 m = next;
4887 }
4888 hibernate_gobble_queue = VM_PAGE_NULL;
4889
4890 if (count)
4891 HIBLOG("Freed %d pages\n", count);
4892 }
4893
4894 static boolean_t
4895 hibernate_consider_discard(vm_page_t m)
4896 {
4897 vm_object_t object = NULL;
4898 int refmod_state;
4899 boolean_t discard = FALSE;
4900
4901 do
4902 {
4903 if (m->private)
4904 panic("hibernate_consider_discard: private");
4905
4906 if (!vm_object_lock_try(m->object)) {
4907 hibernate_stats.cd_lock_failed++;
4908 break;
4909 }
4910 object = m->object;
4911
4912 if (VM_PAGE_WIRED(m)) {
4913 hibernate_stats.cd_found_wired++;
4914 break;
4915 }
4916 if (m->precious) {
4917 hibernate_stats.cd_found_precious++;
4918 break;
4919 }
4920 if (m->busy || !object->alive) {
4921 /*
4922 * Somebody is playing with this page.
4923 */
4924 hibernate_stats.cd_found_busy++;
4925 break;
4926 }
4927 if (m->absent || m->unusual || m->error) {
4928 /*
4929 * If it's unusual in anyway, ignore it
4930 */
4931 hibernate_stats.cd_found_unusual++;
4932 break;
4933 }
4934 if (m->cleaning) {
4935 hibernate_stats.cd_found_cleaning++;
4936 break;
4937 }
4938 if (m->laundry || m->list_req_pending) {
4939 hibernate_stats.cd_found_laundry++;
4940 break;
4941 }
4942 if (!m->dirty)
4943 {
4944 refmod_state = pmap_get_refmod(m->phys_page);
4945
4946 if (refmod_state & VM_MEM_REFERENCED)
4947 m->reference = TRUE;
4948 if (refmod_state & VM_MEM_MODIFIED)
4949 m->dirty = TRUE;
4950 }
4951
4952 /*
4953 * If it's clean or purgeable we can discard the page on wakeup.
4954 */
4955 discard = (!m->dirty)
4956 || (VM_PURGABLE_VOLATILE == object->purgable)
4957 || (VM_PURGABLE_EMPTY == object->purgable);
4958
4959 if (discard == FALSE)
4960 hibernate_stats.cd_found_dirty++;
4961 }
4962 while (FALSE);
4963
4964 if (object)
4965 vm_object_unlock(object);
4966
4967 return (discard);
4968 }
4969
4970
4971 static void
4972 hibernate_discard_page(vm_page_t m)
4973 {
4974 if (m->absent || m->unusual || m->error)
4975 /*
4976 * If it's unusual in anyway, ignore
4977 */
4978 return;
4979
4980 if (m->pmapped == TRUE)
4981 {
4982 __unused int refmod_state = pmap_disconnect(m->phys_page);
4983 }
4984
4985 if (m->laundry)
4986 panic("hibernate_discard_page(%p) laundry", m);
4987 if (m->private)
4988 panic("hibernate_discard_page(%p) private", m);
4989 if (m->fictitious)
4990 panic("hibernate_discard_page(%p) fictitious", m);
4991
4992 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4993 {
4994 /* object should be on a queue */
4995 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4996 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4997 assert(old_queue);
4998 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4999 makes sure these locks are uncontended before sleep */
5000 vm_purgeable_token_delete_first(old_queue);
5001 m->object->purgable = VM_PURGABLE_EMPTY;
5002 }
5003
5004 vm_page_free(m);
5005 }
5006
5007 /*
5008 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5009 pages known to VM to not need saving are subtracted.
5010 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5011 */
5012
5013 void
5014 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5015 hibernate_page_list_t * page_list_wired,
5016 hibernate_page_list_t * page_list_pal,
5017 uint32_t * pagesOut)
5018 {
5019 uint64_t start, end, nsec;
5020 vm_page_t m;
5021 uint32_t pages = page_list->page_count;
5022 uint32_t count_zf = 0, count_throttled = 0;
5023 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
5024 uint32_t count_wire = pages;
5025 uint32_t count_discard_active = 0;
5026 uint32_t count_discard_inactive = 0;
5027 uint32_t count_discard_purgeable = 0;
5028 uint32_t count_discard_speculative = 0;
5029 uint32_t i;
5030 uint32_t bank;
5031 hibernate_bitmap_t * bitmap;
5032 hibernate_bitmap_t * bitmap_wired;
5033
5034
5035 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
5036
5037 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5038
5039 clock_get_uptime(&start);
5040
5041 hibernate_page_list_zero(page_list);
5042 hibernate_page_list_zero(page_list_wired);
5043 hibernate_page_list_zero(page_list_pal);
5044
5045 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5046 hibernate_stats.cd_pages = pages;
5047
5048 if (vm_page_local_q) {
5049 for (i = 0; i < vm_page_local_q_count; i++)
5050 vm_page_reactivate_local(i, TRUE, TRUE);
5051 }
5052
5053 m = (vm_page_t) hibernate_gobble_queue;
5054 while(m)
5055 {
5056 pages--;
5057 count_wire--;
5058 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5059 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5060 m = (vm_page_t) m->pageq.next;
5061 }
5062
5063 for( i = 0; i < real_ncpus; i++ )
5064 {
5065 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5066 {
5067 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5068 {
5069 pages--;
5070 count_wire--;
5071 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5072 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5073
5074 hibernate_stats.cd_local_free++;
5075 hibernate_stats.cd_total_free++;
5076 }
5077 }
5078 }
5079
5080 for( i = 0; i < vm_colors; i++ )
5081 {
5082 queue_iterate(&vm_page_queue_free[i],
5083 m,
5084 vm_page_t,
5085 pageq)
5086 {
5087 pages--;
5088 count_wire--;
5089 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5090 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5091
5092 hibernate_stats.cd_total_free++;
5093 }
5094 }
5095
5096 queue_iterate(&vm_lopage_queue_free,
5097 m,
5098 vm_page_t,
5099 pageq)
5100 {
5101 pages--;
5102 count_wire--;
5103 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5104 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5105
5106 hibernate_stats.cd_total_free++;
5107 }
5108
5109 queue_iterate( &vm_page_queue_throttled,
5110 m,
5111 vm_page_t,
5112 pageq )
5113 {
5114 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5115 && hibernate_consider_discard(m))
5116 {
5117 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5118 count_discard_inactive++;
5119 }
5120 else
5121 count_throttled++;
5122 count_wire--;
5123 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5124 }
5125
5126 queue_iterate( &vm_page_queue_zf,
5127 m,
5128 vm_page_t,
5129 pageq )
5130 {
5131 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5132 && hibernate_consider_discard(m))
5133 {
5134 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5135 if (m->dirty)
5136 count_discard_purgeable++;
5137 else
5138 count_discard_inactive++;
5139 }
5140 else
5141 count_zf++;
5142 count_wire--;
5143 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5144 }
5145
5146 queue_iterate( &vm_page_queue_inactive,
5147 m,
5148 vm_page_t,
5149 pageq )
5150 {
5151 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5152 && hibernate_consider_discard(m))
5153 {
5154 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5155 if (m->dirty)
5156 count_discard_purgeable++;
5157 else
5158 count_discard_inactive++;
5159 }
5160 else
5161 count_inactive++;
5162 count_wire--;
5163 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5164 }
5165
5166 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5167 {
5168 queue_iterate(&vm_page_queue_speculative[i].age_q,
5169 m,
5170 vm_page_t,
5171 pageq)
5172 {
5173 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5174 && hibernate_consider_discard(m))
5175 {
5176 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5177 count_discard_speculative++;
5178 }
5179 else
5180 count_speculative++;
5181 count_wire--;
5182 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5183 }
5184 }
5185
5186 queue_iterate( &vm_page_queue_active,
5187 m,
5188 vm_page_t,
5189 pageq )
5190 {
5191 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5192 && hibernate_consider_discard(m))
5193 {
5194 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5195 if (m->dirty)
5196 count_discard_purgeable++;
5197 else
5198 count_discard_active++;
5199 }
5200 else
5201 count_active++;
5202 count_wire--;
5203 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5204 }
5205
5206 // pull wired from hibernate_bitmap
5207
5208 bitmap = &page_list->bank_bitmap[0];
5209 bitmap_wired = &page_list_wired->bank_bitmap[0];
5210 for (bank = 0; bank < page_list->bank_count; bank++)
5211 {
5212 for (i = 0; i < bitmap->bitmapwords; i++)
5213 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5214 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5215 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5216 }
5217
5218 // machine dependent adjustments
5219 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5220
5221 hibernate_stats.cd_count_wire = count_wire;
5222 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5223
5224 clock_get_uptime(&end);
5225 absolutetime_to_nanoseconds(end - start, &nsec);
5226 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5227
5228 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5229 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5230 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5231
5232 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5233
5234 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5235 }
5236
5237 void
5238 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5239 {
5240 uint64_t start, end, nsec;
5241 vm_page_t m;
5242 vm_page_t next;
5243 uint32_t i;
5244 uint32_t count_discard_active = 0;
5245 uint32_t count_discard_inactive = 0;
5246 uint32_t count_discard_purgeable = 0;
5247 uint32_t count_discard_speculative = 0;
5248
5249 clock_get_uptime(&start);
5250
5251 m = (vm_page_t) queue_first(&vm_page_queue_zf);
5252 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5253 {
5254 next = (vm_page_t) m->pageq.next;
5255 if (hibernate_page_bittst(page_list, m->phys_page))
5256 {
5257 if (m->dirty)
5258 count_discard_purgeable++;
5259 else
5260 count_discard_inactive++;
5261 hibernate_discard_page(m);
5262 }
5263 m = next;
5264 }
5265
5266 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5267 {
5268 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5269 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5270 {
5271 next = (vm_page_t) m->pageq.next;
5272 if (hibernate_page_bittst(page_list, m->phys_page))
5273 {
5274 count_discard_speculative++;
5275 hibernate_discard_page(m);
5276 }
5277 m = next;
5278 }
5279 }
5280
5281 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5282 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5283 {
5284 next = (vm_page_t) m->pageq.next;
5285 if (hibernate_page_bittst(page_list, m->phys_page))
5286 {
5287 if (m->dirty)
5288 count_discard_purgeable++;
5289 else
5290 count_discard_inactive++;
5291 hibernate_discard_page(m);
5292 }
5293 m = next;
5294 }
5295
5296 m = (vm_page_t) queue_first(&vm_page_queue_active);
5297 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5298 {
5299 next = (vm_page_t) m->pageq.next;
5300 if (hibernate_page_bittst(page_list, m->phys_page))
5301 {
5302 if (m->dirty)
5303 count_discard_purgeable++;
5304 else
5305 count_discard_active++;
5306 hibernate_discard_page(m);
5307 }
5308 m = next;
5309 }
5310
5311 clock_get_uptime(&end);
5312 absolutetime_to_nanoseconds(end - start, &nsec);
5313 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5314 nsec / 1000000ULL,
5315 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5316 }
5317
5318 #endif /* HIBERNATION */
5319
5320 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5321
5322 #include <mach_vm_debug.h>
5323 #if MACH_VM_DEBUG
5324
5325 #include <mach_debug/hash_info.h>
5326 #include <vm/vm_debug.h>
5327
5328 /*
5329 * Routine: vm_page_info
5330 * Purpose:
5331 * Return information about the global VP table.
5332 * Fills the buffer with as much information as possible
5333 * and returns the desired size of the buffer.
5334 * Conditions:
5335 * Nothing locked. The caller should provide
5336 * possibly-pageable memory.
5337 */
5338
5339 unsigned int
5340 vm_page_info(
5341 hash_info_bucket_t *info,
5342 unsigned int count)
5343 {
5344 unsigned int i;
5345 lck_spin_t *bucket_lock;
5346
5347 if (vm_page_bucket_count < count)
5348 count = vm_page_bucket_count;
5349
5350 for (i = 0; i < count; i++) {
5351 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5352 unsigned int bucket_count = 0;
5353 vm_page_t m;
5354
5355 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5356 lck_spin_lock(bucket_lock);
5357
5358 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5359 bucket_count++;
5360
5361 lck_spin_unlock(bucket_lock);
5362
5363 /* don't touch pageable memory while holding locks */
5364 info[i].hib_count = bucket_count;
5365 }
5366
5367 return vm_page_bucket_count;
5368 }
5369 #endif /* MACH_VM_DEBUG */
5370
5371 #include <mach_kdb.h>
5372 #if MACH_KDB
5373
5374 #include <ddb/db_output.h>
5375 #include <vm/vm_print.h>
5376 #define printf kdbprintf
5377
5378 /*
5379 * Routine: vm_page_print [exported]
5380 */
5381 void
5382 vm_page_print(
5383 db_addr_t db_addr)
5384 {
5385 vm_page_t p;
5386
5387 p = (vm_page_t) (long) db_addr;
5388
5389 iprintf("page 0x%x\n", p);
5390
5391 db_indent += 2;
5392
5393 iprintf("object=0x%x", p->object);
5394 printf(", offset=0x%x", p->offset);
5395 printf(", wire_count=%d", p->wire_count);
5396
5397 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5398 (p->local ? "" : "!"),
5399 (p->inactive ? "" : "!"),
5400 (p->active ? "" : "!"),
5401 (p->throttled ? "" : "!"),
5402 (p->gobbled ? "" : "!"),
5403 (p->laundry ? "" : "!"),
5404 (p->free ? "" : "!"),
5405 (p->reference ? "" : "!"),
5406 (p->encrypted ? "" : "!"));
5407 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5408 (p->busy ? "" : "!"),
5409 (p->wanted ? "" : "!"),
5410 (p->tabled ? "" : "!"),
5411 (p->fictitious ? "" : "!"),
5412 (p->private ? "" : "!"),
5413 (p->precious ? "" : "!"));
5414 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5415 (p->absent ? "" : "!"),
5416 (p->error ? "" : "!"),
5417 (p->dirty ? "" : "!"),
5418 (p->cleaning ? "" : "!"),
5419 (p->pageout ? "" : "!"),
5420 (p->clustered ? "" : "!"));
5421 iprintf("%soverwriting, %srestart, %sunusual\n",
5422 (p->overwriting ? "" : "!"),
5423 (p->restart ? "" : "!"),
5424 (p->unusual ? "" : "!"));
5425
5426 iprintf("phys_page=0x%x", p->phys_page);
5427
5428 db_indent -= 2;
5429 }
5430 #endif /* MACH_KDB */