]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
979c816246cc3fd1306b1b5775bca773137de74c
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <ppc/mappings.h> /* (BRINGUP) */
89 #include <pexpert/pexpert.h> /* (BRINGUP) */
90
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94
95 #include <IOKit/IOHibernatePrivate.h>
96
97
98 #if CONFIG_EMBEDDED
99 #include <sys/kern_memorystatus.h>
100 #endif
101
102 #include <sys/kdebug.h>
103
104 boolean_t vm_page_free_verify = TRUE;
105
106 uint_t vm_lopage_free_count = 0;
107 uint_t vm_lopage_free_limit = 0;
108 uint_t vm_lopage_lowater = 0;
109 boolean_t vm_lopage_refill = FALSE;
110 boolean_t vm_lopage_needed = FALSE;
111
112 lck_mtx_ext_t vm_page_queue_lock_ext;
113 lck_mtx_ext_t vm_page_queue_free_lock_ext;
114 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
115
116 int speculative_age_index = 0;
117 int speculative_steal_index = 0;
118 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
119
120
121 __private_extern__ void vm_page_init_lck_grp(void);
122
123 static void vm_page_free_prepare(vm_page_t page);
124
125
126
127 /*
128 * Associated with page of user-allocatable memory is a
129 * page structure.
130 */
131
132 /*
133 * These variables record the values returned by vm_page_bootstrap,
134 * for debugging purposes. The implementation of pmap_steal_memory
135 * and pmap_startup here also uses them internally.
136 */
137
138 vm_offset_t virtual_space_start;
139 vm_offset_t virtual_space_end;
140 int vm_page_pages;
141
142 /*
143 * The vm_page_lookup() routine, which provides for fast
144 * (virtual memory object, offset) to page lookup, employs
145 * the following hash table. The vm_page_{insert,remove}
146 * routines install and remove associations in the table.
147 * [This table is often called the virtual-to-physical,
148 * or VP, table.]
149 */
150 typedef struct {
151 vm_page_t pages;
152 #if MACH_PAGE_HASH_STATS
153 int cur_count; /* current count */
154 int hi_count; /* high water mark */
155 #endif /* MACH_PAGE_HASH_STATS */
156 } vm_page_bucket_t;
157
158
159 #define BUCKETS_PER_LOCK 16
160
161 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
162 unsigned int vm_page_bucket_count = 0; /* How big is array? */
163 unsigned int vm_page_hash_mask; /* Mask for hash function */
164 unsigned int vm_page_hash_shift; /* Shift for hash function */
165 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
166 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
167
168 lck_spin_t *vm_page_bucket_locks;
169
170
171 #if MACH_PAGE_HASH_STATS
172 /* This routine is only for debug. It is intended to be called by
173 * hand by a developer using a kernel debugger. This routine prints
174 * out vm_page_hash table statistics to the kernel debug console.
175 */
176 void
177 hash_debug(void)
178 {
179 int i;
180 int numbuckets = 0;
181 int highsum = 0;
182 int maxdepth = 0;
183
184 for (i = 0; i < vm_page_bucket_count; i++) {
185 if (vm_page_buckets[i].hi_count) {
186 numbuckets++;
187 highsum += vm_page_buckets[i].hi_count;
188 if (vm_page_buckets[i].hi_count > maxdepth)
189 maxdepth = vm_page_buckets[i].hi_count;
190 }
191 }
192 printf("Total number of buckets: %d\n", vm_page_bucket_count);
193 printf("Number used buckets: %d = %d%%\n",
194 numbuckets, 100*numbuckets/vm_page_bucket_count);
195 printf("Number unused buckets: %d = %d%%\n",
196 vm_page_bucket_count - numbuckets,
197 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
198 printf("Sum of bucket max depth: %d\n", highsum);
199 printf("Average bucket depth: %d.%2d\n",
200 highsum/vm_page_bucket_count,
201 highsum%vm_page_bucket_count);
202 printf("Maximum bucket depth: %d\n", maxdepth);
203 }
204 #endif /* MACH_PAGE_HASH_STATS */
205
206 /*
207 * The virtual page size is currently implemented as a runtime
208 * variable, but is constant once initialized using vm_set_page_size.
209 * This initialization must be done in the machine-dependent
210 * bootstrap sequence, before calling other machine-independent
211 * initializations.
212 *
213 * All references to the virtual page size outside this
214 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
215 * constants.
216 */
217 vm_size_t page_size = PAGE_SIZE;
218 vm_size_t page_mask = PAGE_MASK;
219 int page_shift = PAGE_SHIFT;
220
221 /*
222 * Resident page structures are initialized from
223 * a template (see vm_page_alloc).
224 *
225 * When adding a new field to the virtual memory
226 * object structure, be sure to add initialization
227 * (see vm_page_bootstrap).
228 */
229 struct vm_page vm_page_template;
230
231 vm_page_t vm_pages = VM_PAGE_NULL;
232 unsigned int vm_pages_count = 0;
233 ppnum_t vm_page_lowest = 0;
234
235 /*
236 * Resident pages that represent real memory
237 * are allocated from a set of free lists,
238 * one per color.
239 */
240 unsigned int vm_colors;
241 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
242 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
243 queue_head_t vm_page_queue_free[MAX_COLORS];
244 vm_page_t vm_page_queue_fictitious;
245 unsigned int vm_page_free_wanted;
246 unsigned int vm_page_free_wanted_privileged;
247 unsigned int vm_page_free_count;
248 unsigned int vm_page_fictitious_count;
249
250 unsigned int vm_page_free_count_minimum; /* debugging */
251
252 /*
253 * Occasionally, the virtual memory system uses
254 * resident page structures that do not refer to
255 * real pages, for example to leave a page with
256 * important state information in the VP table.
257 *
258 * These page structures are allocated the way
259 * most other kernel structures are.
260 */
261 zone_t vm_page_zone;
262 vm_locks_array_t vm_page_locks;
263 decl_lck_mtx_data(,vm_page_alloc_lock)
264 unsigned int io_throttle_zero_fill;
265
266 unsigned int vm_page_local_q_count = 0;
267 unsigned int vm_page_local_q_soft_limit = 250;
268 unsigned int vm_page_local_q_hard_limit = 500;
269 struct vplq *vm_page_local_q = NULL;
270
271 /*
272 * Fictitious pages don't have a physical address,
273 * but we must initialize phys_page to something.
274 * For debugging, this should be a strange value
275 * that the pmap module can recognize in assertions.
276 */
277 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
278
279 /*
280 * Guard pages are not accessible so they don't
281 * need a physical address, but we need to enter
282 * one in the pmap.
283 * Let's make it recognizable and make sure that
284 * we don't use a real physical page with that
285 * physical address.
286 */
287 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
288
289 /*
290 * Resident page structures are also chained on
291 * queues that are used by the page replacement
292 * system (pageout daemon). These queues are
293 * defined here, but are shared by the pageout
294 * module. The inactive queue is broken into
295 * inactive and zf for convenience as the
296 * pageout daemon often assignes a higher
297 * affinity to zf pages
298 */
299 queue_head_t vm_page_queue_active;
300 queue_head_t vm_page_queue_inactive;
301 queue_head_t vm_page_queue_zf; /* inactive memory queue for zero fill */
302 queue_head_t vm_page_queue_throttled;
303
304 unsigned int vm_page_active_count;
305 unsigned int vm_page_inactive_count;
306 unsigned int vm_page_throttled_count;
307 unsigned int vm_page_speculative_count;
308 unsigned int vm_page_wire_count;
309 unsigned int vm_page_wire_count_initial;
310 unsigned int vm_page_gobble_count = 0;
311 unsigned int vm_page_wire_count_warning = 0;
312 unsigned int vm_page_gobble_count_warning = 0;
313
314 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
315 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
316 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
317
318 #if DEVELOPMENT || DEBUG
319 unsigned int vm_page_speculative_recreated = 0;
320 unsigned int vm_page_speculative_created = 0;
321 unsigned int vm_page_speculative_used = 0;
322 #endif
323
324 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
325 ppnum_t max_valid_low_ppnum = 0xffffffff;
326
327
328 /*
329 * Several page replacement parameters are also
330 * shared with this module, so that page allocation
331 * (done here in vm_page_alloc) can trigger the
332 * pageout daemon.
333 */
334 unsigned int vm_page_free_target = 0;
335 unsigned int vm_page_free_min = 0;
336 unsigned int vm_page_throttle_limit = 0;
337 uint32_t vm_page_creation_throttle = 0;
338 unsigned int vm_page_inactive_target = 0;
339 unsigned int vm_page_inactive_min = 0;
340 unsigned int vm_page_free_reserved = 0;
341 unsigned int vm_page_throttle_count = 0;
342
343 /*
344 * The VM system has a couple of heuristics for deciding
345 * that pages are "uninteresting" and should be placed
346 * on the inactive queue as likely candidates for replacement.
347 * These variables let the heuristics be controlled at run-time
348 * to make experimentation easier.
349 */
350
351 boolean_t vm_page_deactivate_hint = TRUE;
352
353 struct vm_page_stats_reusable vm_page_stats_reusable;
354
355 /*
356 * vm_set_page_size:
357 *
358 * Sets the page size, perhaps based upon the memory
359 * size. Must be called before any use of page-size
360 * dependent functions.
361 *
362 * Sets page_shift and page_mask from page_size.
363 */
364 void
365 vm_set_page_size(void)
366 {
367 page_mask = page_size - 1;
368
369 if ((page_mask & page_size) != 0)
370 panic("vm_set_page_size: page size not a power of two");
371
372 for (page_shift = 0; ; page_shift++)
373 if ((1U << page_shift) == page_size)
374 break;
375 }
376
377
378 /* Called once during statup, once the cache geometry is known.
379 */
380 static void
381 vm_page_set_colors( void )
382 {
383 unsigned int n, override;
384
385 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
386 n = override;
387 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
388 n = vm_cache_geometry_colors;
389 else n = DEFAULT_COLORS; /* use default if all else fails */
390
391 if ( n == 0 )
392 n = 1;
393 if ( n > MAX_COLORS )
394 n = MAX_COLORS;
395
396 /* the count must be a power of 2 */
397 if ( ( n & (n - 1)) != 0 )
398 panic("vm_page_set_colors");
399
400 vm_colors = n;
401 vm_color_mask = n - 1;
402 }
403
404
405 lck_grp_t vm_page_lck_grp_free;
406 lck_grp_t vm_page_lck_grp_queue;
407 lck_grp_t vm_page_lck_grp_local;
408 lck_grp_t vm_page_lck_grp_purge;
409 lck_grp_t vm_page_lck_grp_alloc;
410 lck_grp_t vm_page_lck_grp_bucket;
411 lck_grp_attr_t vm_page_lck_grp_attr;
412 lck_attr_t vm_page_lck_attr;
413
414
415 __private_extern__ void
416 vm_page_init_lck_grp(void)
417 {
418 /*
419 * initialze the vm_page lock world
420 */
421 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
422 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
423 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
424 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
425 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
426 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
427 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
428 lck_attr_setdefault(&vm_page_lck_attr);
429 }
430
431 void
432 vm_page_init_local_q()
433 {
434 unsigned int num_cpus;
435 unsigned int i;
436 struct vplq *t_local_q;
437
438 num_cpus = ml_get_max_cpus();
439
440 /*
441 * no point in this for a uni-processor system
442 */
443 if (num_cpus >= 2) {
444 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
445
446 for (i = 0; i < num_cpus; i++) {
447 struct vpl *lq;
448
449 lq = &t_local_q[i].vpl_un.vpl;
450 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
451 queue_init(&lq->vpl_queue);
452 lq->vpl_count = 0;
453 }
454 vm_page_local_q_count = num_cpus;
455
456 vm_page_local_q = (struct vplq *)t_local_q;
457 }
458 }
459
460
461 uint64_t initial_max_mem;
462 int initial_wire_count;
463 int initial_free_count;
464 int initial_lopage_count;
465
466 /*
467 * vm_page_bootstrap:
468 *
469 * Initializes the resident memory module.
470 *
471 * Allocates memory for the page cells, and
472 * for the object/offset-to-page hash table headers.
473 * Each page cell is initialized and placed on the free list.
474 * Returns the range of available kernel virtual memory.
475 */
476
477 void
478 vm_page_bootstrap(
479 vm_offset_t *startp,
480 vm_offset_t *endp)
481 {
482 register vm_page_t m;
483 unsigned int i;
484 unsigned int log1;
485 unsigned int log2;
486 unsigned int size;
487
488 /*
489 * Initialize the vm_page template.
490 */
491
492 m = &vm_page_template;
493 bzero(m, sizeof (*m));
494
495 m->pageq.next = NULL;
496 m->pageq.prev = NULL;
497 m->listq.next = NULL;
498 m->listq.prev = NULL;
499 m->next = VM_PAGE_NULL;
500
501 m->object = VM_OBJECT_NULL; /* reset later */
502 m->offset = (vm_object_offset_t) -1; /* reset later */
503
504 m->wire_count = 0;
505 m->local = FALSE;
506 m->inactive = FALSE;
507 m->active = FALSE;
508 m->pageout_queue = FALSE;
509 m->speculative = FALSE;
510 m->laundry = FALSE;
511 m->free = FALSE;
512 m->reference = FALSE;
513 m->gobbled = FALSE;
514 m->private = FALSE;
515 m->throttled = FALSE;
516 m->__unused_pageq_bits = 0;
517
518 m->phys_page = 0; /* reset later */
519
520 m->busy = TRUE;
521 m->wanted = FALSE;
522 m->tabled = FALSE;
523 m->fictitious = FALSE;
524 m->pmapped = FALSE;
525 m->wpmapped = FALSE;
526 m->pageout = FALSE;
527 m->absent = FALSE;
528 m->error = FALSE;
529 m->dirty = FALSE;
530 m->cleaning = FALSE;
531 m->precious = FALSE;
532 m->clustered = FALSE;
533 m->overwriting = FALSE;
534 m->restart = FALSE;
535 m->unusual = FALSE;
536 m->encrypted = FALSE;
537 m->encrypted_cleaning = FALSE;
538 m->list_req_pending = FALSE;
539 m->dump_cleaning = FALSE;
540 m->cs_validated = FALSE;
541 m->cs_tainted = FALSE;
542 m->no_cache = FALSE;
543 m->zero_fill = FALSE;
544 m->reusable = FALSE;
545 m->__unused_object_bits = 0;
546
547
548 /*
549 * Initialize the page queues.
550 */
551 vm_page_init_lck_grp();
552
553 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
554 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
555 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
556
557 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
558 int group;
559
560 purgeable_queues[i].token_q_head = 0;
561 purgeable_queues[i].token_q_tail = 0;
562 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
563 queue_init(&purgeable_queues[i].objq[group]);
564
565 purgeable_queues[i].type = i;
566 purgeable_queues[i].new_pages = 0;
567 #if MACH_ASSERT
568 purgeable_queues[i].debug_count_tokens = 0;
569 purgeable_queues[i].debug_count_objects = 0;
570 #endif
571 };
572
573 for (i = 0; i < MAX_COLORS; i++ )
574 queue_init(&vm_page_queue_free[i]);
575 queue_init(&vm_lopage_queue_free);
576 vm_page_queue_fictitious = VM_PAGE_NULL;
577 queue_init(&vm_page_queue_active);
578 queue_init(&vm_page_queue_inactive);
579 queue_init(&vm_page_queue_throttled);
580 queue_init(&vm_page_queue_zf);
581
582 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
583 queue_init(&vm_page_queue_speculative[i].age_q);
584
585 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
586 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
587 }
588 vm_page_free_wanted = 0;
589 vm_page_free_wanted_privileged = 0;
590
591 vm_page_set_colors();
592
593
594 /*
595 * Steal memory for the map and zone subsystems.
596 */
597
598 vm_map_steal_memory();
599 zone_steal_memory();
600
601 /*
602 * Allocate (and initialize) the virtual-to-physical
603 * table hash buckets.
604 *
605 * The number of buckets should be a power of two to
606 * get a good hash function. The following computation
607 * chooses the first power of two that is greater
608 * than the number of physical pages in the system.
609 */
610
611 if (vm_page_bucket_count == 0) {
612 unsigned int npages = pmap_free_pages();
613
614 vm_page_bucket_count = 1;
615 while (vm_page_bucket_count < npages)
616 vm_page_bucket_count <<= 1;
617 }
618 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
619
620 vm_page_hash_mask = vm_page_bucket_count - 1;
621
622 /*
623 * Calculate object shift value for hashing algorithm:
624 * O = log2(sizeof(struct vm_object))
625 * B = log2(vm_page_bucket_count)
626 * hash shifts the object left by
627 * B/2 - O
628 */
629 size = vm_page_bucket_count;
630 for (log1 = 0; size > 1; log1++)
631 size /= 2;
632 size = sizeof(struct vm_object);
633 for (log2 = 0; size > 1; log2++)
634 size /= 2;
635 vm_page_hash_shift = log1/2 - log2 + 1;
636
637 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
638 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
639 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
640
641 if (vm_page_hash_mask & vm_page_bucket_count)
642 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
643
644 vm_page_buckets = (vm_page_bucket_t *)
645 pmap_steal_memory(vm_page_bucket_count *
646 sizeof(vm_page_bucket_t));
647
648 vm_page_bucket_locks = (lck_spin_t *)
649 pmap_steal_memory(vm_page_bucket_lock_count *
650 sizeof(lck_spin_t));
651
652 for (i = 0; i < vm_page_bucket_count; i++) {
653 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
654
655 bucket->pages = VM_PAGE_NULL;
656 #if MACH_PAGE_HASH_STATS
657 bucket->cur_count = 0;
658 bucket->hi_count = 0;
659 #endif /* MACH_PAGE_HASH_STATS */
660 }
661
662 for (i = 0; i < vm_page_bucket_lock_count; i++)
663 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
664
665 /*
666 * Machine-dependent code allocates the resident page table.
667 * It uses vm_page_init to initialize the page frames.
668 * The code also returns to us the virtual space available
669 * to the kernel. We don't trust the pmap module
670 * to get the alignment right.
671 */
672
673 pmap_startup(&virtual_space_start, &virtual_space_end);
674 virtual_space_start = round_page(virtual_space_start);
675 virtual_space_end = trunc_page(virtual_space_end);
676
677 *startp = virtual_space_start;
678 *endp = virtual_space_end;
679
680 /*
681 * Compute the initial "wire" count.
682 * Up until now, the pages which have been set aside are not under
683 * the VM system's control, so although they aren't explicitly
684 * wired, they nonetheless can't be moved. At this moment,
685 * all VM managed pages are "free", courtesy of pmap_startup.
686 */
687 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
688 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
689 vm_page_wire_count_initial = vm_page_wire_count;
690 vm_page_free_count_minimum = vm_page_free_count;
691
692 initial_max_mem = max_mem;
693 initial_wire_count = vm_page_wire_count;
694 initial_free_count = vm_page_free_count;
695 initial_lopage_count = vm_lopage_free_count;
696
697 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
698 vm_page_free_count, vm_page_wire_count);
699
700 simple_lock_init(&vm_paging_lock, 0);
701 }
702
703 #ifndef MACHINE_PAGES
704 /*
705 * We implement pmap_steal_memory and pmap_startup with the help
706 * of two simpler functions, pmap_virtual_space and pmap_next_page.
707 */
708
709 void *
710 pmap_steal_memory(
711 vm_size_t size)
712 {
713 vm_offset_t addr, vaddr;
714 ppnum_t phys_page;
715
716 /*
717 * We round the size to a round multiple.
718 */
719
720 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
721
722 /*
723 * If this is the first call to pmap_steal_memory,
724 * we have to initialize ourself.
725 */
726
727 if (virtual_space_start == virtual_space_end) {
728 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
729
730 /*
731 * The initial values must be aligned properly, and
732 * we don't trust the pmap module to do it right.
733 */
734
735 virtual_space_start = round_page(virtual_space_start);
736 virtual_space_end = trunc_page(virtual_space_end);
737 }
738
739 /*
740 * Allocate virtual memory for this request.
741 */
742
743 addr = virtual_space_start;
744 virtual_space_start += size;
745
746 kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
747
748 /*
749 * Allocate and map physical pages to back new virtual pages.
750 */
751
752 for (vaddr = round_page(addr);
753 vaddr < addr + size;
754 vaddr += PAGE_SIZE) {
755
756 if (!pmap_next_page_hi(&phys_page))
757 panic("pmap_steal_memory");
758
759 /*
760 * XXX Logically, these mappings should be wired,
761 * but some pmap modules barf if they are.
762 */
763 #if defined(__LP64__)
764 pmap_pre_expand(kernel_pmap, vaddr);
765 #endif
766
767 pmap_enter(kernel_pmap, vaddr, phys_page,
768 VM_PROT_READ|VM_PROT_WRITE,
769 VM_WIMG_USE_DEFAULT, FALSE);
770 /*
771 * Account for newly stolen memory
772 */
773 vm_page_wire_count++;
774
775 }
776
777 return (void *) addr;
778 }
779
780 void
781 pmap_startup(
782 vm_offset_t *startp,
783 vm_offset_t *endp)
784 {
785 unsigned int i, npages, pages_initialized, fill, fillval;
786 ppnum_t phys_page;
787 addr64_t tmpaddr;
788
789 /*
790 * We calculate how many page frames we will have
791 * and then allocate the page structures in one chunk.
792 */
793
794 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
795 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
796 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
797
798 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
799
800 /*
801 * Initialize the page frames.
802 */
803 for (i = 0, pages_initialized = 0; i < npages; i++) {
804 if (!pmap_next_page(&phys_page))
805 break;
806 if (pages_initialized == 0 || phys_page < vm_page_lowest)
807 vm_page_lowest = phys_page;
808
809 vm_page_init(&vm_pages[i], phys_page, FALSE);
810 vm_page_pages++;
811 pages_initialized++;
812 }
813 vm_pages_count = pages_initialized;
814
815 /*
816 * Check if we want to initialize pages to a known value
817 */
818 fill = 0; /* Assume no fill */
819 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
820
821 // -debug code remove
822 if (2 == vm_himemory_mode) {
823 // free low -> high so high is preferred
824 for (i = 1; i <= pages_initialized; i++) {
825 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
826 vm_page_release(&vm_pages[i - 1]);
827 }
828 }
829 else
830 // debug code remove-
831
832 /*
833 * Release pages in reverse order so that physical pages
834 * initially get allocated in ascending addresses. This keeps
835 * the devices (which must address physical memory) happy if
836 * they require several consecutive pages.
837 */
838 for (i = pages_initialized; i > 0; i--) {
839 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
840 vm_page_release(&vm_pages[i - 1]);
841 }
842
843 #if 0
844 {
845 vm_page_t xx, xxo, xxl;
846 int i, j, k, l;
847
848 j = 0; /* (BRINGUP) */
849 xxl = 0;
850
851 for( i = 0; i < vm_colors; i++ ) {
852 queue_iterate(&vm_page_queue_free[i],
853 xx,
854 vm_page_t,
855 pageq) { /* BRINGUP */
856 j++; /* (BRINGUP) */
857 if(j > vm_page_free_count) { /* (BRINGUP) */
858 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
859 }
860
861 l = vm_page_free_count - j; /* (BRINGUP) */
862 k = 0; /* (BRINGUP) */
863
864 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
865
866 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
867 k++;
868 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
869 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
870 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
871 }
872 }
873
874 xxl = xx;
875 }
876 }
877
878 if(j != vm_page_free_count) { /* (BRINGUP) */
879 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
880 }
881 }
882 #endif
883
884
885 /*
886 * We have to re-align virtual_space_start,
887 * because pmap_steal_memory has been using it.
888 */
889
890 virtual_space_start = round_page(virtual_space_start);
891
892 *startp = virtual_space_start;
893 *endp = virtual_space_end;
894 }
895 #endif /* MACHINE_PAGES */
896
897 /*
898 * Routine: vm_page_module_init
899 * Purpose:
900 * Second initialization pass, to be done after
901 * the basic VM system is ready.
902 */
903 void
904 vm_page_module_init(void)
905 {
906 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
907 0, PAGE_SIZE, "vm pages");
908
909 #if ZONE_DEBUG
910 zone_debug_disable(vm_page_zone);
911 #endif /* ZONE_DEBUG */
912
913 zone_change(vm_page_zone, Z_EXPAND, FALSE);
914 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
915 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
916
917 /*
918 * Adjust zone statistics to account for the real pages allocated
919 * in vm_page_create(). [Q: is this really what we want?]
920 */
921 vm_page_zone->count += vm_page_pages;
922 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
923
924 lck_mtx_init(&vm_page_alloc_lock, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
925 }
926
927 /*
928 * Routine: vm_page_create
929 * Purpose:
930 * After the VM system is up, machine-dependent code
931 * may stumble across more physical memory. For example,
932 * memory that it was reserving for a frame buffer.
933 * vm_page_create turns this memory into available pages.
934 */
935
936 void
937 vm_page_create(
938 ppnum_t start,
939 ppnum_t end)
940 {
941 ppnum_t phys_page;
942 vm_page_t m;
943
944 for (phys_page = start;
945 phys_page < end;
946 phys_page++) {
947 while ((m = (vm_page_t) vm_page_grab_fictitious())
948 == VM_PAGE_NULL)
949 vm_page_more_fictitious();
950
951 vm_page_init(m, phys_page, FALSE);
952 pmap_clear_noencrypt(phys_page);
953 vm_page_pages++;
954 vm_page_release(m);
955 }
956 }
957
958 /*
959 * vm_page_hash:
960 *
961 * Distributes the object/offset key pair among hash buckets.
962 *
963 * NOTE: The bucket count must be a power of 2
964 */
965 #define vm_page_hash(object, offset) (\
966 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
967 & vm_page_hash_mask)
968
969
970 /*
971 * vm_page_insert: [ internal use only ]
972 *
973 * Inserts the given mem entry into the object/object-page
974 * table and object list.
975 *
976 * The object must be locked.
977 */
978 void
979 vm_page_insert(
980 vm_page_t mem,
981 vm_object_t object,
982 vm_object_offset_t offset)
983 {
984 vm_page_insert_internal(mem, object, offset, FALSE, TRUE);
985 }
986
987 void
988 vm_page_insert_internal(
989 vm_page_t mem,
990 vm_object_t object,
991 vm_object_offset_t offset,
992 boolean_t queues_lock_held,
993 boolean_t insert_in_hash)
994 {
995 vm_page_bucket_t *bucket;
996 lck_spin_t *bucket_lock;
997 int hash_id;
998
999 XPR(XPR_VM_PAGE,
1000 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1001 object, offset, mem, 0,0);
1002
1003 VM_PAGE_CHECK(mem);
1004
1005 if (object == vm_submap_object) {
1006 /* the vm_submap_object is only a placeholder for submaps */
1007 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1008 }
1009
1010 vm_object_lock_assert_exclusive(object);
1011 #if DEBUG
1012 lck_mtx_assert(&vm_page_queue_lock,
1013 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1014 : LCK_MTX_ASSERT_NOTOWNED);
1015 #endif /* DEBUG */
1016
1017 if (insert_in_hash == TRUE) {
1018 #if DEBUG
1019 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1020 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1021 "already in (obj=%p,off=0x%llx)",
1022 mem, object, offset, mem->object, mem->offset);
1023 #endif
1024 assert(!object->internal || offset < object->size);
1025
1026 /* only insert "pageout" pages into "pageout" objects,
1027 * and normal pages into normal objects */
1028 assert(object->pageout == mem->pageout);
1029
1030 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1031
1032 /*
1033 * Record the object/offset pair in this page
1034 */
1035
1036 mem->object = object;
1037 mem->offset = offset;
1038
1039 /*
1040 * Insert it into the object_object/offset hash table
1041 */
1042 hash_id = vm_page_hash(object, offset);
1043 bucket = &vm_page_buckets[hash_id];
1044 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1045
1046 lck_spin_lock(bucket_lock);
1047
1048 mem->next = bucket->pages;
1049 bucket->pages = mem;
1050 #if MACH_PAGE_HASH_STATS
1051 if (++bucket->cur_count > bucket->hi_count)
1052 bucket->hi_count = bucket->cur_count;
1053 #endif /* MACH_PAGE_HASH_STATS */
1054
1055 lck_spin_unlock(bucket_lock);
1056 }
1057 /*
1058 * Now link into the object's list of backed pages.
1059 */
1060
1061 VM_PAGE_INSERT(mem, object);
1062 mem->tabled = TRUE;
1063
1064 /*
1065 * Show that the object has one more resident page.
1066 */
1067
1068 object->resident_page_count++;
1069 if (VM_PAGE_WIRED(mem)) {
1070 object->wired_page_count++;
1071 }
1072 assert(object->resident_page_count >= object->wired_page_count);
1073
1074 assert(!mem->reusable);
1075
1076 if (object->purgable == VM_PURGABLE_VOLATILE) {
1077 if (VM_PAGE_WIRED(mem)) {
1078 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1079 } else {
1080 OSAddAtomic(1, &vm_page_purgeable_count);
1081 }
1082 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1083 mem->throttled) {
1084 /*
1085 * This page belongs to a purged VM object but hasn't
1086 * been purged (because it was "busy").
1087 * It's in the "throttled" queue and hence not
1088 * visible to vm_pageout_scan(). Move it to a pageable
1089 * queue, so that it can eventually be reclaimed, instead
1090 * of lingering in the "empty" object.
1091 */
1092 if (queues_lock_held == FALSE)
1093 vm_page_lockspin_queues();
1094 vm_page_deactivate(mem);
1095 if (queues_lock_held == FALSE)
1096 vm_page_unlock_queues();
1097 }
1098 }
1099
1100 /*
1101 * vm_page_replace:
1102 *
1103 * Exactly like vm_page_insert, except that we first
1104 * remove any existing page at the given offset in object.
1105 *
1106 * The object must be locked.
1107 */
1108 void
1109 vm_page_replace(
1110 register vm_page_t mem,
1111 register vm_object_t object,
1112 register vm_object_offset_t offset)
1113 {
1114 vm_page_bucket_t *bucket;
1115 vm_page_t found_m = VM_PAGE_NULL;
1116 lck_spin_t *bucket_lock;
1117 int hash_id;
1118
1119 VM_PAGE_CHECK(mem);
1120 vm_object_lock_assert_exclusive(object);
1121 #if DEBUG
1122 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1123 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1124 "already in (obj=%p,off=0x%llx)",
1125 mem, object, offset, mem->object, mem->offset);
1126 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1127 #endif
1128 /*
1129 * Record the object/offset pair in this page
1130 */
1131
1132 mem->object = object;
1133 mem->offset = offset;
1134
1135 /*
1136 * Insert it into the object_object/offset hash table,
1137 * replacing any page that might have been there.
1138 */
1139
1140 hash_id = vm_page_hash(object, offset);
1141 bucket = &vm_page_buckets[hash_id];
1142 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1143
1144 lck_spin_lock(bucket_lock);
1145
1146 if (bucket->pages) {
1147 vm_page_t *mp = &bucket->pages;
1148 vm_page_t m = *mp;
1149
1150 do {
1151 if (m->object == object && m->offset == offset) {
1152 /*
1153 * Remove old page from hash list
1154 */
1155 *mp = m->next;
1156
1157 found_m = m;
1158 break;
1159 }
1160 mp = &m->next;
1161 } while ((m = *mp));
1162
1163 mem->next = bucket->pages;
1164 } else {
1165 mem->next = VM_PAGE_NULL;
1166 }
1167 /*
1168 * insert new page at head of hash list
1169 */
1170 bucket->pages = mem;
1171
1172 lck_spin_unlock(bucket_lock);
1173
1174 if (found_m) {
1175 /*
1176 * there was already a page at the specified
1177 * offset for this object... remove it from
1178 * the object and free it back to the free list
1179 */
1180 vm_page_free_unlocked(found_m, FALSE);
1181 }
1182 vm_page_insert_internal(mem, object, offset, FALSE, FALSE);
1183 }
1184
1185 /*
1186 * vm_page_remove: [ internal use only ]
1187 *
1188 * Removes the given mem entry from the object/offset-page
1189 * table and the object page list.
1190 *
1191 * The object must be locked.
1192 */
1193
1194 void
1195 vm_page_remove(
1196 vm_page_t mem,
1197 boolean_t remove_from_hash)
1198 {
1199 vm_page_bucket_t *bucket;
1200 vm_page_t this;
1201 lck_spin_t *bucket_lock;
1202 int hash_id;
1203
1204 XPR(XPR_VM_PAGE,
1205 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1206 mem->object, mem->offset,
1207 mem, 0,0);
1208
1209 vm_object_lock_assert_exclusive(mem->object);
1210 assert(mem->tabled);
1211 assert(!mem->cleaning);
1212 VM_PAGE_CHECK(mem);
1213
1214 if (remove_from_hash == TRUE) {
1215 /*
1216 * Remove from the object_object/offset hash table
1217 */
1218 hash_id = vm_page_hash(mem->object, mem->offset);
1219 bucket = &vm_page_buckets[hash_id];
1220 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1221
1222 lck_spin_lock(bucket_lock);
1223
1224 if ((this = bucket->pages) == mem) {
1225 /* optimize for common case */
1226
1227 bucket->pages = mem->next;
1228 } else {
1229 vm_page_t *prev;
1230
1231 for (prev = &this->next;
1232 (this = *prev) != mem;
1233 prev = &this->next)
1234 continue;
1235 *prev = this->next;
1236 }
1237 #if MACH_PAGE_HASH_STATS
1238 bucket->cur_count--;
1239 #endif /* MACH_PAGE_HASH_STATS */
1240
1241 lck_spin_unlock(bucket_lock);
1242 }
1243 /*
1244 * Now remove from the object's list of backed pages.
1245 */
1246
1247 VM_PAGE_REMOVE(mem);
1248
1249 /*
1250 * And show that the object has one fewer resident
1251 * page.
1252 */
1253
1254 assert(mem->object->resident_page_count > 0);
1255 mem->object->resident_page_count--;
1256 if (VM_PAGE_WIRED(mem)) {
1257 assert(mem->object->wired_page_count > 0);
1258 mem->object->wired_page_count--;
1259 }
1260 assert(mem->object->resident_page_count >=
1261 mem->object->wired_page_count);
1262 if (mem->reusable) {
1263 assert(mem->object->reusable_page_count > 0);
1264 mem->object->reusable_page_count--;
1265 assert(mem->object->reusable_page_count <=
1266 mem->object->resident_page_count);
1267 mem->reusable = FALSE;
1268 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1269 vm_page_stats_reusable.reused_remove++;
1270 } else if (mem->object->all_reusable) {
1271 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1272 vm_page_stats_reusable.reused_remove++;
1273 }
1274
1275 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1276 if (VM_PAGE_WIRED(mem)) {
1277 assert(vm_page_purgeable_wired_count > 0);
1278 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1279 } else {
1280 assert(vm_page_purgeable_count > 0);
1281 OSAddAtomic(-1, &vm_page_purgeable_count);
1282 }
1283 }
1284 mem->tabled = FALSE;
1285 mem->object = VM_OBJECT_NULL;
1286 mem->offset = (vm_object_offset_t) -1;
1287 }
1288
1289
1290 /*
1291 * vm_page_lookup:
1292 *
1293 * Returns the page associated with the object/offset
1294 * pair specified; if none is found, VM_PAGE_NULL is returned.
1295 *
1296 * The object must be locked. No side effects.
1297 */
1298
1299 unsigned long vm_page_lookup_hint = 0;
1300 unsigned long vm_page_lookup_hint_next = 0;
1301 unsigned long vm_page_lookup_hint_prev = 0;
1302 unsigned long vm_page_lookup_hint_miss = 0;
1303 unsigned long vm_page_lookup_bucket_NULL = 0;
1304 unsigned long vm_page_lookup_miss = 0;
1305
1306
1307 vm_page_t
1308 vm_page_lookup(
1309 vm_object_t object,
1310 vm_object_offset_t offset)
1311 {
1312 vm_page_t mem;
1313 vm_page_bucket_t *bucket;
1314 queue_entry_t qe;
1315 lck_spin_t *bucket_lock;
1316 int hash_id;
1317
1318 vm_object_lock_assert_held(object);
1319 mem = object->memq_hint;
1320
1321 if (mem != VM_PAGE_NULL) {
1322 assert(mem->object == object);
1323
1324 if (mem->offset == offset) {
1325 vm_page_lookup_hint++;
1326 return mem;
1327 }
1328 qe = queue_next(&mem->listq);
1329
1330 if (! queue_end(&object->memq, qe)) {
1331 vm_page_t next_page;
1332
1333 next_page = (vm_page_t) qe;
1334 assert(next_page->object == object);
1335
1336 if (next_page->offset == offset) {
1337 vm_page_lookup_hint_next++;
1338 object->memq_hint = next_page; /* new hint */
1339 return next_page;
1340 }
1341 }
1342 qe = queue_prev(&mem->listq);
1343
1344 if (! queue_end(&object->memq, qe)) {
1345 vm_page_t prev_page;
1346
1347 prev_page = (vm_page_t) qe;
1348 assert(prev_page->object == object);
1349
1350 if (prev_page->offset == offset) {
1351 vm_page_lookup_hint_prev++;
1352 object->memq_hint = prev_page; /* new hint */
1353 return prev_page;
1354 }
1355 }
1356 }
1357 /*
1358 * Search the hash table for this object/offset pair
1359 */
1360 hash_id = vm_page_hash(object, offset);
1361 bucket = &vm_page_buckets[hash_id];
1362
1363 /*
1364 * since we hold the object lock, we are guaranteed that no
1365 * new pages can be inserted into this object... this in turn
1366 * guarantess that the page we're looking for can't exist
1367 * if the bucket it hashes to is currently NULL even when looked
1368 * at outside the scope of the hash bucket lock... this is a
1369 * really cheap optimiztion to avoid taking the lock
1370 */
1371 if (bucket->pages == VM_PAGE_NULL) {
1372 vm_page_lookup_bucket_NULL++;
1373
1374 return (VM_PAGE_NULL);
1375 }
1376 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1377
1378 lck_spin_lock(bucket_lock);
1379
1380 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1381 VM_PAGE_CHECK(mem);
1382 if ((mem->object == object) && (mem->offset == offset))
1383 break;
1384 }
1385 lck_spin_unlock(bucket_lock);
1386
1387 if (mem != VM_PAGE_NULL) {
1388 if (object->memq_hint != VM_PAGE_NULL) {
1389 vm_page_lookup_hint_miss++;
1390 }
1391 assert(mem->object == object);
1392 object->memq_hint = mem;
1393 } else
1394 vm_page_lookup_miss++;
1395
1396 return(mem);
1397 }
1398
1399
1400 /*
1401 * vm_page_rename:
1402 *
1403 * Move the given memory entry from its
1404 * current object to the specified target object/offset.
1405 *
1406 * The object must be locked.
1407 */
1408 void
1409 vm_page_rename(
1410 register vm_page_t mem,
1411 register vm_object_t new_object,
1412 vm_object_offset_t new_offset,
1413 boolean_t encrypted_ok)
1414 {
1415 assert(mem->object != new_object);
1416
1417 /*
1418 * ENCRYPTED SWAP:
1419 * The encryption key is based on the page's memory object
1420 * (aka "pager") and paging offset. Moving the page to
1421 * another VM object changes its "pager" and "paging_offset"
1422 * so it has to be decrypted first, or we would lose the key.
1423 *
1424 * One exception is VM object collapsing, where we transfer pages
1425 * from one backing object to its parent object. This operation also
1426 * transfers the paging information, so the <pager,paging_offset> info
1427 * should remain consistent. The caller (vm_object_do_collapse())
1428 * sets "encrypted_ok" in this case.
1429 */
1430 if (!encrypted_ok && mem->encrypted) {
1431 panic("vm_page_rename: page %p is encrypted\n", mem);
1432 }
1433
1434 XPR(XPR_VM_PAGE,
1435 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1436 new_object, new_offset,
1437 mem, 0,0);
1438
1439 /*
1440 * Changes to mem->object require the page lock because
1441 * the pageout daemon uses that lock to get the object.
1442 */
1443 vm_page_lockspin_queues();
1444
1445 vm_page_remove(mem, TRUE);
1446 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE);
1447
1448 vm_page_unlock_queues();
1449 }
1450
1451 /*
1452 * vm_page_init:
1453 *
1454 * Initialize the fields in a new page.
1455 * This takes a structure with random values and initializes it
1456 * so that it can be given to vm_page_release or vm_page_insert.
1457 */
1458 void
1459 vm_page_init(
1460 vm_page_t mem,
1461 ppnum_t phys_page,
1462 boolean_t lopage)
1463 {
1464 assert(phys_page);
1465
1466 *mem = vm_page_template;
1467 mem->phys_page = phys_page;
1468 mem->lopage = lopage;
1469 }
1470
1471 /*
1472 * vm_page_grab_fictitious:
1473 *
1474 * Remove a fictitious page from the free list.
1475 * Returns VM_PAGE_NULL if there are no free pages.
1476 */
1477 int c_vm_page_grab_fictitious = 0;
1478 int c_vm_page_release_fictitious = 0;
1479 int c_vm_page_more_fictitious = 0;
1480
1481 extern vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
1482
1483 vm_page_t
1484 vm_page_grab_fictitious_common(
1485 ppnum_t phys_addr)
1486 {
1487 register vm_page_t m;
1488
1489 m = (vm_page_t)zget(vm_page_zone);
1490 if (m) {
1491 vm_page_init(m, phys_addr, FALSE);
1492 m->fictitious = TRUE;
1493 }
1494
1495 c_vm_page_grab_fictitious++;
1496 return m;
1497 }
1498
1499 vm_page_t
1500 vm_page_grab_fictitious(void)
1501 {
1502 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1503 }
1504
1505 vm_page_t
1506 vm_page_grab_guard(void)
1507 {
1508 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1509 }
1510
1511 /*
1512 * vm_page_release_fictitious:
1513 *
1514 * Release a fictitious page to the free list.
1515 */
1516
1517 void
1518 vm_page_release_fictitious(
1519 register vm_page_t m)
1520 {
1521 assert(!m->free);
1522 assert(m->busy);
1523 assert(m->fictitious);
1524 assert(m->phys_page == vm_page_fictitious_addr ||
1525 m->phys_page == vm_page_guard_addr);
1526
1527 c_vm_page_release_fictitious++;
1528 #if DEBUG
1529 if (m->free)
1530 panic("vm_page_release_fictitious");
1531 #endif
1532 m->free = TRUE;
1533 zfree(vm_page_zone, m);
1534 }
1535
1536 /*
1537 * vm_page_more_fictitious:
1538 *
1539 * Add more fictitious pages to the free list.
1540 * Allowed to block. This routine is way intimate
1541 * with the zones code, for several reasons:
1542 * 1. we need to carve some page structures out of physical
1543 * memory before zones work, so they _cannot_ come from
1544 * the zone_map.
1545 * 2. the zone needs to be collectable in order to prevent
1546 * growth without bound. These structures are used by
1547 * the device pager (by the hundreds and thousands), as
1548 * private pages for pageout, and as blocking pages for
1549 * pagein. Temporary bursts in demand should not result in
1550 * permanent allocation of a resource.
1551 * 3. To smooth allocation humps, we allocate single pages
1552 * with kernel_memory_allocate(), and cram them into the
1553 * zone. This also allows us to initialize the vm_page_t's
1554 * on the way into the zone, so that zget() always returns
1555 * an initialized structure. The zone free element pointer
1556 * and the free page pointer are both the first item in the
1557 * vm_page_t.
1558 * 4. By having the pages in the zone pre-initialized, we need
1559 * not keep 2 levels of lists. The garbage collector simply
1560 * scans our list, and reduces physical memory usage as it
1561 * sees fit.
1562 */
1563
1564 void vm_page_more_fictitious(void)
1565 {
1566 register vm_page_t m;
1567 vm_offset_t addr;
1568 kern_return_t retval;
1569 int i;
1570
1571 c_vm_page_more_fictitious++;
1572
1573 /*
1574 * Allocate a single page from the zone_map. Do not wait if no physical
1575 * pages are immediately available, and do not zero the space. We need
1576 * our own blocking lock here to prevent having multiple,
1577 * simultaneous requests from piling up on the zone_map lock. Exactly
1578 * one (of our) threads should be potentially waiting on the map lock.
1579 * If winner is not vm-privileged, then the page allocation will fail,
1580 * and it will temporarily block here in the vm_page_wait().
1581 */
1582 lck_mtx_lock(&vm_page_alloc_lock);
1583 /*
1584 * If another thread allocated space, just bail out now.
1585 */
1586 if (zone_free_count(vm_page_zone) > 5) {
1587 /*
1588 * The number "5" is a small number that is larger than the
1589 * number of fictitious pages that any single caller will
1590 * attempt to allocate. Otherwise, a thread will attempt to
1591 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1592 * release all of the resources and locks already acquired,
1593 * and then call this routine. This routine finds the pages
1594 * that the caller released, so fails to allocate new space.
1595 * The process repeats infinitely. The largest known number
1596 * of fictitious pages required in this manner is 2. 5 is
1597 * simply a somewhat larger number.
1598 */
1599 lck_mtx_unlock(&vm_page_alloc_lock);
1600 return;
1601 }
1602
1603 retval = kernel_memory_allocate(zone_map,
1604 &addr, PAGE_SIZE, VM_PROT_ALL,
1605 KMA_KOBJECT|KMA_NOPAGEWAIT);
1606 if (retval != KERN_SUCCESS) {
1607 /*
1608 * No page was available. Tell the pageout daemon, drop the
1609 * lock to give another thread a chance at it, and
1610 * wait for the pageout daemon to make progress.
1611 */
1612 lck_mtx_unlock(&vm_page_alloc_lock);
1613 vm_page_wait(THREAD_UNINT);
1614 return;
1615 }
1616 /*
1617 * Initialize as many vm_page_t's as will fit on this page. This
1618 * depends on the zone code disturbing ONLY the first item of
1619 * each zone element.
1620 */
1621 m = (vm_page_t)addr;
1622 for (i = PAGE_SIZE/sizeof(struct vm_page); i > 0; i--) {
1623 vm_page_init(m, vm_page_fictitious_addr, FALSE);
1624 m->fictitious = TRUE;
1625 m++;
1626 }
1627 zcram(vm_page_zone, (void *) addr, PAGE_SIZE);
1628 lck_mtx_unlock(&vm_page_alloc_lock);
1629 }
1630
1631
1632 /*
1633 * vm_pool_low():
1634 *
1635 * Return true if it is not likely that a non-vm_privileged thread
1636 * can get memory without blocking. Advisory only, since the
1637 * situation may change under us.
1638 */
1639 int
1640 vm_pool_low(void)
1641 {
1642 /* No locking, at worst we will fib. */
1643 return( vm_page_free_count <= vm_page_free_reserved );
1644 }
1645
1646
1647
1648 /*
1649 * this is an interface to support bring-up of drivers
1650 * on platforms with physical memory > 4G...
1651 */
1652 int vm_himemory_mode = 0;
1653
1654
1655 /*
1656 * this interface exists to support hardware controllers
1657 * incapable of generating DMAs with more than 32 bits
1658 * of address on platforms with physical memory > 4G...
1659 */
1660 unsigned int vm_lopages_allocated_q = 0;
1661 unsigned int vm_lopages_allocated_cpm_success = 0;
1662 unsigned int vm_lopages_allocated_cpm_failed = 0;
1663 queue_head_t vm_lopage_queue_free;
1664
1665 vm_page_t
1666 vm_page_grablo(void)
1667 {
1668 vm_page_t mem;
1669
1670 if (vm_lopage_needed == FALSE)
1671 return (vm_page_grab());
1672
1673 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1674
1675 if ( !queue_empty(&vm_lopage_queue_free)) {
1676 queue_remove_first(&vm_lopage_queue_free,
1677 mem,
1678 vm_page_t,
1679 pageq);
1680 assert(vm_lopage_free_count);
1681
1682 vm_lopage_free_count--;
1683 vm_lopages_allocated_q++;
1684
1685 if (vm_lopage_free_count < vm_lopage_lowater)
1686 vm_lopage_refill = TRUE;
1687
1688 lck_mtx_unlock(&vm_page_queue_free_lock);
1689 } else {
1690 lck_mtx_unlock(&vm_page_queue_free_lock);
1691
1692 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1693
1694 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1695 vm_lopages_allocated_cpm_failed++;
1696 lck_mtx_unlock(&vm_page_queue_free_lock);
1697
1698 return (VM_PAGE_NULL);
1699 }
1700 mem->busy = TRUE;
1701
1702 vm_page_lockspin_queues();
1703
1704 mem->gobbled = FALSE;
1705 vm_page_gobble_count--;
1706 vm_page_wire_count--;
1707
1708 vm_lopages_allocated_cpm_success++;
1709 vm_page_unlock_queues();
1710 }
1711 assert(mem->busy);
1712 assert(!mem->free);
1713 assert(!mem->pmapped);
1714 assert(!mem->wpmapped);
1715
1716 mem->pageq.next = NULL;
1717 mem->pageq.prev = NULL;
1718
1719 return (mem);
1720 }
1721
1722 /*
1723 * vm_page_grab:
1724 *
1725 * first try to grab a page from the per-cpu free list...
1726 * this must be done while pre-emption is disabled... if
1727 * a page is available, we're done...
1728 * if no page is available, grab the vm_page_queue_free_lock
1729 * and see if current number of free pages would allow us
1730 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1731 * if there are pages available, disable preemption and
1732 * recheck the state of the per-cpu free list... we could
1733 * have been preempted and moved to a different cpu, or
1734 * some other thread could have re-filled it... if still
1735 * empty, figure out how many pages we can steal from the
1736 * global free queue and move to the per-cpu queue...
1737 * return 1 of these pages when done... only wakeup the
1738 * pageout_scan thread if we moved pages from the global
1739 * list... no need for the wakeup if we've satisfied the
1740 * request from the per-cpu queue.
1741 */
1742
1743 #define COLOR_GROUPS_TO_STEAL 4
1744
1745
1746 vm_page_t
1747 vm_page_grab( void )
1748 {
1749 vm_page_t mem;
1750
1751
1752 disable_preemption();
1753
1754 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1755 return_page_from_cpu_list:
1756 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1757 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1758 mem->pageq.next = NULL;
1759
1760 enable_preemption();
1761
1762 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1763 assert(mem->tabled == FALSE);
1764 assert(mem->object == VM_OBJECT_NULL);
1765 assert(!mem->laundry);
1766 assert(!mem->free);
1767 assert(pmap_verify_free(mem->phys_page));
1768 assert(mem->busy);
1769 assert(!mem->encrypted);
1770 assert(!mem->pmapped);
1771 assert(!mem->wpmapped);
1772
1773 return mem;
1774 }
1775 enable_preemption();
1776
1777
1778 /*
1779 * Optionally produce warnings if the wire or gobble
1780 * counts exceed some threshold.
1781 */
1782 if (vm_page_wire_count_warning > 0
1783 && vm_page_wire_count >= vm_page_wire_count_warning) {
1784 printf("mk: vm_page_grab(): high wired page count of %d\n",
1785 vm_page_wire_count);
1786 assert(vm_page_wire_count < vm_page_wire_count_warning);
1787 }
1788 if (vm_page_gobble_count_warning > 0
1789 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1790 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1791 vm_page_gobble_count);
1792 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1793 }
1794
1795 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1796
1797 /*
1798 * Only let privileged threads (involved in pageout)
1799 * dip into the reserved pool.
1800 */
1801 if ((vm_page_free_count < vm_page_free_reserved) &&
1802 !(current_thread()->options & TH_OPT_VMPRIV)) {
1803 lck_mtx_unlock(&vm_page_queue_free_lock);
1804 mem = VM_PAGE_NULL;
1805 }
1806 else {
1807 vm_page_t head;
1808 vm_page_t tail;
1809 unsigned int pages_to_steal;
1810 unsigned int color;
1811
1812 while ( vm_page_free_count == 0 ) {
1813
1814 lck_mtx_unlock(&vm_page_queue_free_lock);
1815 /*
1816 * must be a privileged thread to be
1817 * in this state since a non-privileged
1818 * thread would have bailed if we were
1819 * under the vm_page_free_reserved mark
1820 */
1821 VM_PAGE_WAIT();
1822 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1823 }
1824
1825 disable_preemption();
1826
1827 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1828 lck_mtx_unlock(&vm_page_queue_free_lock);
1829
1830 /*
1831 * we got preempted and moved to another processor
1832 * or we got preempted and someone else ran and filled the cache
1833 */
1834 goto return_page_from_cpu_list;
1835 }
1836 if (vm_page_free_count <= vm_page_free_reserved)
1837 pages_to_steal = 1;
1838 else {
1839 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1840
1841 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1842 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1843 }
1844 color = PROCESSOR_DATA(current_processor(), start_color);
1845 head = tail = NULL;
1846
1847 while (pages_to_steal--) {
1848 if (--vm_page_free_count < vm_page_free_count_minimum)
1849 vm_page_free_count_minimum = vm_page_free_count;
1850
1851 while (queue_empty(&vm_page_queue_free[color]))
1852 color = (color + 1) & vm_color_mask;
1853
1854 queue_remove_first(&vm_page_queue_free[color],
1855 mem,
1856 vm_page_t,
1857 pageq);
1858 mem->pageq.next = NULL;
1859 mem->pageq.prev = NULL;
1860
1861 color = (color + 1) & vm_color_mask;
1862
1863 if (head == NULL)
1864 head = mem;
1865 else
1866 tail->pageq.next = (queue_t)mem;
1867 tail = mem;
1868
1869 mem->pageq.prev = NULL;
1870 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1871 assert(mem->tabled == FALSE);
1872 assert(mem->object == VM_OBJECT_NULL);
1873 assert(!mem->laundry);
1874 assert(mem->free);
1875 mem->free = FALSE;
1876
1877 assert(pmap_verify_free(mem->phys_page));
1878 assert(mem->busy);
1879 assert(!mem->free);
1880 assert(!mem->encrypted);
1881 assert(!mem->pmapped);
1882 assert(!mem->wpmapped);
1883 }
1884 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
1885 PROCESSOR_DATA(current_processor(), start_color) = color;
1886
1887 /*
1888 * satisfy this request
1889 */
1890 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1891 mem = head;
1892 mem->pageq.next = NULL;
1893
1894 lck_mtx_unlock(&vm_page_queue_free_lock);
1895
1896 enable_preemption();
1897 }
1898 /*
1899 * Decide if we should poke the pageout daemon.
1900 * We do this if the free count is less than the low
1901 * water mark, or if the free count is less than the high
1902 * water mark (but above the low water mark) and the inactive
1903 * count is less than its target.
1904 *
1905 * We don't have the counts locked ... if they change a little,
1906 * it doesn't really matter.
1907 */
1908 if ((vm_page_free_count < vm_page_free_min) ||
1909 ((vm_page_free_count < vm_page_free_target) &&
1910 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
1911 thread_wakeup((event_t) &vm_page_free_wanted);
1912
1913 #if CONFIG_EMBEDDED
1914 {
1915 int percent_avail;
1916
1917 /*
1918 * Decide if we need to poke the memorystatus notification thread.
1919 */
1920 percent_avail =
1921 (vm_page_active_count + vm_page_inactive_count +
1922 vm_page_speculative_count + vm_page_free_count +
1923 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
1924 atop_64(max_mem);
1925 if (percent_avail <= (kern_memorystatus_level - 5)) {
1926 kern_memorystatus_level = percent_avail;
1927 thread_wakeup((event_t)&kern_memorystatus_wakeup);
1928 }
1929 }
1930 #endif
1931
1932 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
1933
1934 return mem;
1935 }
1936
1937 /*
1938 * vm_page_release:
1939 *
1940 * Return a page to the free list.
1941 */
1942
1943 void
1944 vm_page_release(
1945 register vm_page_t mem)
1946 {
1947 unsigned int color;
1948 int need_wakeup = 0;
1949 int need_priv_wakeup = 0;
1950 #if 0
1951 unsigned int pindex;
1952 phys_entry *physent;
1953
1954 physent = mapping_phys_lookup(mem->phys_page, &pindex); /* (BRINGUP) */
1955 if(physent->ppLink & ppN) { /* (BRINGUP) */
1956 panic("vm_page_release: already released - %08X %08X\n", mem, mem->phys_page);
1957 }
1958 physent->ppLink = physent->ppLink | ppN; /* (BRINGUP) */
1959 #endif
1960 assert(!mem->private && !mem->fictitious);
1961 if (vm_page_free_verify) {
1962 assert(pmap_verify_free(mem->phys_page));
1963 }
1964 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
1965
1966
1967 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1968 #if DEBUG
1969 if (mem->free)
1970 panic("vm_page_release");
1971 #endif
1972 assert(mem->busy);
1973 assert(!mem->laundry);
1974 assert(mem->object == VM_OBJECT_NULL);
1975 assert(mem->pageq.next == NULL &&
1976 mem->pageq.prev == NULL);
1977 assert(mem->listq.next == NULL &&
1978 mem->listq.prev == NULL);
1979
1980 if ((mem->lopage || vm_lopage_refill == TRUE) &&
1981 vm_lopage_free_count < vm_lopage_free_limit &&
1982 mem->phys_page < max_valid_low_ppnum) {
1983 /*
1984 * this exists to support hardware controllers
1985 * incapable of generating DMAs with more than 32 bits
1986 * of address on platforms with physical memory > 4G...
1987 */
1988 queue_enter_first(&vm_lopage_queue_free,
1989 mem,
1990 vm_page_t,
1991 pageq);
1992 vm_lopage_free_count++;
1993
1994 if (vm_lopage_free_count >= vm_lopage_free_limit)
1995 vm_lopage_refill = FALSE;
1996
1997 mem->lopage = TRUE;
1998 } else {
1999 mem->lopage = FALSE;
2000 mem->free = TRUE;
2001
2002 color = mem->phys_page & vm_color_mask;
2003 queue_enter_first(&vm_page_queue_free[color],
2004 mem,
2005 vm_page_t,
2006 pageq);
2007 vm_page_free_count++;
2008 /*
2009 * Check if we should wake up someone waiting for page.
2010 * But don't bother waking them unless they can allocate.
2011 *
2012 * We wakeup only one thread, to prevent starvation.
2013 * Because the scheduling system handles wait queues FIFO,
2014 * if we wakeup all waiting threads, one greedy thread
2015 * can starve multiple niceguy threads. When the threads
2016 * all wakeup, the greedy threads runs first, grabs the page,
2017 * and waits for another page. It will be the first to run
2018 * when the next page is freed.
2019 *
2020 * However, there is a slight danger here.
2021 * The thread we wake might not use the free page.
2022 * Then the other threads could wait indefinitely
2023 * while the page goes unused. To forestall this,
2024 * the pageout daemon will keep making free pages
2025 * as long as vm_page_free_wanted is non-zero.
2026 */
2027
2028 assert(vm_page_free_count > 0);
2029 if (vm_page_free_wanted_privileged > 0) {
2030 vm_page_free_wanted_privileged--;
2031 need_priv_wakeup = 1;
2032 } else if (vm_page_free_wanted > 0 &&
2033 vm_page_free_count > vm_page_free_reserved) {
2034 vm_page_free_wanted--;
2035 need_wakeup = 1;
2036 }
2037 }
2038 lck_mtx_unlock(&vm_page_queue_free_lock);
2039
2040 if (need_priv_wakeup)
2041 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2042 else if (need_wakeup)
2043 thread_wakeup_one((event_t) &vm_page_free_count);
2044
2045 #if CONFIG_EMBEDDED
2046 {
2047 int percent_avail;
2048
2049 /*
2050 * Decide if we need to poke the memorystatus notification thread.
2051 * Locking is not a big issue, as only a single thread delivers these.
2052 */
2053 percent_avail =
2054 (vm_page_active_count + vm_page_inactive_count +
2055 vm_page_speculative_count + vm_page_free_count +
2056 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2057 atop_64(max_mem);
2058 if (percent_avail >= (kern_memorystatus_level + 5)) {
2059 kern_memorystatus_level = percent_avail;
2060 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2061 }
2062 }
2063 #endif
2064 }
2065
2066 /*
2067 * vm_page_wait:
2068 *
2069 * Wait for a page to become available.
2070 * If there are plenty of free pages, then we don't sleep.
2071 *
2072 * Returns:
2073 * TRUE: There may be another page, try again
2074 * FALSE: We were interrupted out of our wait, don't try again
2075 */
2076
2077 boolean_t
2078 vm_page_wait(
2079 int interruptible )
2080 {
2081 /*
2082 * We can't use vm_page_free_reserved to make this
2083 * determination. Consider: some thread might
2084 * need to allocate two pages. The first allocation
2085 * succeeds, the second fails. After the first page is freed,
2086 * a call to vm_page_wait must really block.
2087 */
2088 kern_return_t wait_result;
2089 int need_wakeup = 0;
2090 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2091
2092 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2093
2094 if (is_privileged && vm_page_free_count) {
2095 lck_mtx_unlock(&vm_page_queue_free_lock);
2096 return TRUE;
2097 }
2098 if (vm_page_free_count < vm_page_free_target) {
2099
2100 if (is_privileged) {
2101 if (vm_page_free_wanted_privileged++ == 0)
2102 need_wakeup = 1;
2103 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2104 } else {
2105 if (vm_page_free_wanted++ == 0)
2106 need_wakeup = 1;
2107 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2108 }
2109 lck_mtx_unlock(&vm_page_queue_free_lock);
2110 counter(c_vm_page_wait_block++);
2111
2112 if (need_wakeup)
2113 thread_wakeup((event_t)&vm_page_free_wanted);
2114
2115 if (wait_result == THREAD_WAITING)
2116 wait_result = thread_block(THREAD_CONTINUE_NULL);
2117
2118 return(wait_result == THREAD_AWAKENED);
2119 } else {
2120 lck_mtx_unlock(&vm_page_queue_free_lock);
2121 return TRUE;
2122 }
2123 }
2124
2125 /*
2126 * vm_page_alloc:
2127 *
2128 * Allocate and return a memory cell associated
2129 * with this VM object/offset pair.
2130 *
2131 * Object must be locked.
2132 */
2133
2134 vm_page_t
2135 vm_page_alloc(
2136 vm_object_t object,
2137 vm_object_offset_t offset)
2138 {
2139 register vm_page_t mem;
2140
2141 vm_object_lock_assert_exclusive(object);
2142 mem = vm_page_grab();
2143 if (mem == VM_PAGE_NULL)
2144 return VM_PAGE_NULL;
2145
2146 vm_page_insert(mem, object, offset);
2147
2148 return(mem);
2149 }
2150
2151 vm_page_t
2152 vm_page_alloclo(
2153 vm_object_t object,
2154 vm_object_offset_t offset)
2155 {
2156 register vm_page_t mem;
2157
2158 vm_object_lock_assert_exclusive(object);
2159 mem = vm_page_grablo();
2160 if (mem == VM_PAGE_NULL)
2161 return VM_PAGE_NULL;
2162
2163 vm_page_insert(mem, object, offset);
2164
2165 return(mem);
2166 }
2167
2168
2169 /*
2170 * vm_page_alloc_guard:
2171 *
2172 * Allocate a fictitious page which will be used
2173 * as a guard page. The page will be inserted into
2174 * the object and returned to the caller.
2175 */
2176
2177 vm_page_t
2178 vm_page_alloc_guard(
2179 vm_object_t object,
2180 vm_object_offset_t offset)
2181 {
2182 register vm_page_t mem;
2183
2184 vm_object_lock_assert_exclusive(object);
2185 mem = vm_page_grab_guard();
2186 if (mem == VM_PAGE_NULL)
2187 return VM_PAGE_NULL;
2188
2189 vm_page_insert(mem, object, offset);
2190
2191 return(mem);
2192 }
2193
2194
2195 counter(unsigned int c_laundry_pages_freed = 0;)
2196
2197 /*
2198 * vm_page_free:
2199 *
2200 * Returns the given page to the free list,
2201 * disassociating it with any VM object.
2202 *
2203 * Object and page queues must be locked prior to entry.
2204 */
2205 static void
2206 vm_page_free_prepare(
2207 register vm_page_t mem)
2208 {
2209 vm_page_free_prepare_queues(mem);
2210 vm_page_free_prepare_object(mem, TRUE);
2211 }
2212
2213
2214 void
2215 vm_page_free_prepare_queues(
2216 vm_page_t mem)
2217 {
2218 VM_PAGE_CHECK(mem);
2219 assert(!mem->free);
2220 assert(!mem->cleaning);
2221 assert(!mem->pageout);
2222 #if DEBUG
2223 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2224 if (mem->free)
2225 panic("vm_page_free: freeing page on free list\n");
2226 #endif
2227 if (mem->object) {
2228 vm_object_lock_assert_exclusive(mem->object);
2229 }
2230
2231 if (mem->laundry) {
2232 /*
2233 * We may have to free a page while it's being laundered
2234 * if we lost its pager (due to a forced unmount, for example).
2235 * We need to call vm_pageout_throttle_up() before removing
2236 * the page from its VM object, so that we can find out on
2237 * which pageout queue the page is on.
2238 */
2239 vm_pageout_throttle_up(mem);
2240 counter(++c_laundry_pages_freed);
2241 }
2242 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2243
2244 if (VM_PAGE_WIRED(mem)) {
2245 if (mem->object) {
2246 assert(mem->object->wired_page_count > 0);
2247 mem->object->wired_page_count--;
2248 assert(mem->object->resident_page_count >=
2249 mem->object->wired_page_count);
2250 }
2251 if (!mem->private && !mem->fictitious)
2252 vm_page_wire_count--;
2253 mem->wire_count = 0;
2254 assert(!mem->gobbled);
2255 } else if (mem->gobbled) {
2256 if (!mem->private && !mem->fictitious)
2257 vm_page_wire_count--;
2258 vm_page_gobble_count--;
2259 }
2260 }
2261
2262
2263 void
2264 vm_page_free_prepare_object(
2265 vm_page_t mem,
2266 boolean_t remove_from_hash)
2267 {
2268 if (mem->object) {
2269 vm_object_lock_assert_exclusive(mem->object);
2270 }
2271
2272 if (mem->tabled)
2273 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2274
2275 PAGE_WAKEUP(mem); /* clears wanted */
2276
2277 if (mem->private) {
2278 mem->private = FALSE;
2279 mem->fictitious = TRUE;
2280 mem->phys_page = vm_page_fictitious_addr;
2281 }
2282 if (mem->fictitious) {
2283 /* Some of these may be unnecessary */
2284 mem->gobbled = FALSE;
2285 mem->busy = TRUE;
2286 mem->absent = FALSE;
2287 mem->error = FALSE;
2288 mem->dirty = FALSE;
2289 mem->precious = FALSE;
2290 mem->reference = FALSE;
2291 mem->encrypted = FALSE;
2292 mem->encrypted_cleaning = FALSE;
2293 mem->pmapped = FALSE;
2294 mem->wpmapped = FALSE;
2295 mem->reusable = FALSE;
2296 } else {
2297 if (mem->zero_fill == TRUE)
2298 VM_ZF_COUNT_DECR();
2299 vm_page_init(mem, mem->phys_page, mem->lopage);
2300 }
2301 }
2302
2303
2304 void
2305 vm_page_free(
2306 vm_page_t mem)
2307 {
2308 vm_page_free_prepare(mem);
2309 if (mem->fictitious) {
2310 vm_page_release_fictitious(mem);
2311 } else {
2312 vm_page_release(mem);
2313 }
2314 }
2315
2316
2317 void
2318 vm_page_free_unlocked(
2319 vm_page_t mem,
2320 boolean_t remove_from_hash)
2321 {
2322 vm_page_lockspin_queues();
2323 vm_page_free_prepare_queues(mem);
2324 vm_page_unlock_queues();
2325
2326 vm_page_free_prepare_object(mem, remove_from_hash);
2327
2328 if (mem->fictitious) {
2329 vm_page_release_fictitious(mem);
2330 } else {
2331 vm_page_release(mem);
2332 }
2333 }
2334
2335 /*
2336 * Free a list of pages. The list can be up to several hundred pages,
2337 * as blocked up by vm_pageout_scan().
2338 * The big win is not having to take the free list lock once
2339 * per page. We sort the incoming pages into n lists, one for
2340 * each color.
2341 */
2342 void
2343 vm_page_free_list(
2344 vm_page_t mem,
2345 boolean_t prepare_object)
2346 {
2347 vm_page_t nxt;
2348 int pg_count = 0;
2349 int color;
2350 int inuse_list_head = -1;
2351
2352 queue_head_t free_list[MAX_COLORS];
2353 int inuse[MAX_COLORS];
2354
2355 for (color = 0; color < (signed) vm_colors; color++) {
2356 queue_init(&free_list[color]);
2357 }
2358
2359 while (mem) {
2360 assert(!mem->inactive);
2361 assert(!mem->active);
2362 assert(!mem->throttled);
2363 assert(!mem->free);
2364 assert(!mem->speculative);
2365 assert(!VM_PAGE_WIRED(mem));
2366 assert(mem->pageq.prev == NULL);
2367
2368 nxt = (vm_page_t)(mem->pageq.next);
2369
2370 if (prepare_object == TRUE)
2371 vm_page_free_prepare_object(mem, TRUE);
2372
2373 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2374 assert(pmap_verify_free(mem->phys_page));
2375 }
2376 assert(mem->busy);
2377
2378 if (!mem->fictitious) {
2379 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2380 vm_lopage_free_count < vm_lopage_free_limit &&
2381 mem->phys_page < max_valid_low_ppnum) {
2382 mem->pageq.next = NULL;
2383 vm_page_release(mem);
2384 } else {
2385
2386 /*
2387 * IMPORTANT: we can't set the page "free" here
2388 * because that would make the page eligible for
2389 * a physically-contiguous allocation (see
2390 * vm_page_find_contiguous()) right away (we don't
2391 * hold the vm_page_queue_free lock). That would
2392 * cause trouble because the page is not actually
2393 * in the free queue yet...
2394 */
2395 color = mem->phys_page & vm_color_mask;
2396 if (queue_empty(&free_list[color])) {
2397 inuse[color] = inuse_list_head;
2398 inuse_list_head = color;
2399 }
2400 queue_enter_first(&free_list[color],
2401 mem,
2402 vm_page_t,
2403 pageq);
2404 pg_count++;
2405 }
2406 } else {
2407 assert(mem->phys_page == vm_page_fictitious_addr ||
2408 mem->phys_page == vm_page_guard_addr);
2409 vm_page_release_fictitious(mem);
2410 }
2411 mem = nxt;
2412 }
2413 if (pg_count) {
2414 unsigned int avail_free_count;
2415 unsigned int need_wakeup = 0;
2416 unsigned int need_priv_wakeup = 0;
2417
2418 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2419
2420 color = inuse_list_head;
2421
2422 while( color != -1 ) {
2423 vm_page_t first, last;
2424 vm_page_t first_free;
2425
2426 /*
2427 * Now that we hold the vm_page_queue_free lock,
2428 * it's safe to mark all pages in our local queue
2429 * as "free"...
2430 */
2431 queue_iterate(&free_list[color],
2432 mem,
2433 vm_page_t,
2434 pageq) {
2435 assert(!mem->free);
2436 assert(mem->busy);
2437 mem->free = TRUE;
2438 }
2439
2440 /*
2441 * ... and insert our local queue at the head of
2442 * the global free queue.
2443 */
2444 first = (vm_page_t) queue_first(&free_list[color]);
2445 last = (vm_page_t) queue_last(&free_list[color]);
2446 first_free = (vm_page_t) queue_first(&vm_page_queue_free[color]);
2447 if (queue_empty(&vm_page_queue_free[color])) {
2448 queue_last(&vm_page_queue_free[color]) =
2449 (queue_entry_t) last;
2450 } else {
2451 queue_prev(&first_free->pageq) =
2452 (queue_entry_t) last;
2453 }
2454 queue_first(&vm_page_queue_free[color]) =
2455 (queue_entry_t) first;
2456 queue_prev(&first->pageq) =
2457 (queue_entry_t) &vm_page_queue_free[color];
2458 queue_next(&last->pageq) =
2459 (queue_entry_t) first_free;
2460
2461 /* next color */
2462 color = inuse[color];
2463 }
2464
2465 vm_page_free_count += pg_count;
2466 avail_free_count = vm_page_free_count;
2467
2468 if (vm_page_free_wanted_privileged > 0 &&
2469 avail_free_count > 0) {
2470 if (avail_free_count < vm_page_free_wanted_privileged) {
2471 need_priv_wakeup = avail_free_count;
2472 vm_page_free_wanted_privileged -=
2473 avail_free_count;
2474 avail_free_count = 0;
2475 } else {
2476 need_priv_wakeup = vm_page_free_wanted_privileged;
2477 vm_page_free_wanted_privileged = 0;
2478 avail_free_count -=
2479 vm_page_free_wanted_privileged;
2480 }
2481 }
2482
2483 if (vm_page_free_wanted > 0 &&
2484 avail_free_count > vm_page_free_reserved) {
2485 unsigned int available_pages;
2486
2487 available_pages = (avail_free_count -
2488 vm_page_free_reserved);
2489
2490 if (available_pages >= vm_page_free_wanted) {
2491 need_wakeup = vm_page_free_wanted;
2492 vm_page_free_wanted = 0;
2493 } else {
2494 need_wakeup = available_pages;
2495 vm_page_free_wanted -= available_pages;
2496 }
2497 }
2498 lck_mtx_unlock(&vm_page_queue_free_lock);
2499
2500 if (need_priv_wakeup != 0) {
2501 /*
2502 * There shouldn't be that many VM-privileged threads,
2503 * so let's wake them all up, even if we don't quite
2504 * have enough pages to satisfy them all.
2505 */
2506 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2507 }
2508 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2509 /*
2510 * We don't expect to have any more waiters
2511 * after this, so let's wake them all up at
2512 * once.
2513 */
2514 thread_wakeup((event_t) &vm_page_free_count);
2515 } else for (; need_wakeup != 0; need_wakeup--) {
2516 /*
2517 * Wake up one waiter per page we just released.
2518 */
2519 thread_wakeup_one((event_t) &vm_page_free_count);
2520 }
2521 #if CONFIG_EMBEDDED
2522 {
2523 int percent_avail;
2524
2525 /*
2526 * Decide if we need to poke the memorystatus notification thread.
2527 */
2528 percent_avail =
2529 (vm_page_active_count + vm_page_inactive_count +
2530 vm_page_speculative_count + vm_page_free_count +
2531 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2532 atop_64(max_mem);
2533 if (percent_avail >= (kern_memorystatus_level + 5)) {
2534 kern_memorystatus_level = percent_avail;
2535 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2536 }
2537 }
2538 #endif
2539 }
2540 }
2541
2542
2543 /*
2544 * vm_page_wire:
2545 *
2546 * Mark this page as wired down by yet
2547 * another map, removing it from paging queues
2548 * as necessary.
2549 *
2550 * The page's object and the page queues must be locked.
2551 */
2552 void
2553 vm_page_wire(
2554 register vm_page_t mem)
2555 {
2556
2557 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2558
2559 VM_PAGE_CHECK(mem);
2560 if (mem->object) {
2561 vm_object_lock_assert_exclusive(mem->object);
2562 } else {
2563 /*
2564 * In theory, the page should be in an object before it
2565 * gets wired, since we need to hold the object lock
2566 * to update some fields in the page structure.
2567 * However, some code (i386 pmap, for example) might want
2568 * to wire a page before it gets inserted into an object.
2569 * That's somewhat OK, as long as nobody else can get to
2570 * that page and update it at the same time.
2571 */
2572 }
2573 #if DEBUG
2574 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2575 #endif
2576 if ( !VM_PAGE_WIRED(mem)) {
2577 VM_PAGE_QUEUES_REMOVE(mem);
2578
2579 if (mem->object) {
2580 mem->object->wired_page_count++;
2581 assert(mem->object->resident_page_count >=
2582 mem->object->wired_page_count);
2583 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2584 assert(vm_page_purgeable_count > 0);
2585 OSAddAtomic(-1, &vm_page_purgeable_count);
2586 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2587 }
2588 if (mem->object->all_reusable) {
2589 /*
2590 * Wired pages are not counted as "re-usable"
2591 * in "all_reusable" VM objects, so nothing
2592 * to do here.
2593 */
2594 } else if (mem->reusable) {
2595 /*
2596 * This page is not "re-usable" when it's
2597 * wired, so adjust its state and the
2598 * accounting.
2599 */
2600 vm_object_reuse_pages(mem->object,
2601 mem->offset,
2602 mem->offset+PAGE_SIZE_64,
2603 FALSE);
2604 }
2605 }
2606 assert(!mem->reusable);
2607
2608 if (!mem->private && !mem->fictitious && !mem->gobbled)
2609 vm_page_wire_count++;
2610 if (mem->gobbled)
2611 vm_page_gobble_count--;
2612 mem->gobbled = FALSE;
2613 if (mem->zero_fill == TRUE) {
2614 mem->zero_fill = FALSE;
2615 VM_ZF_COUNT_DECR();
2616 }
2617 #if CONFIG_EMBEDDED
2618 {
2619 int percent_avail;
2620
2621 /*
2622 * Decide if we need to poke the memorystatus notification thread.
2623 */
2624 percent_avail =
2625 (vm_page_active_count + vm_page_inactive_count +
2626 vm_page_speculative_count + vm_page_free_count +
2627 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2628 atop_64(max_mem);
2629 if (percent_avail <= (kern_memorystatus_level - 5)) {
2630 kern_memorystatus_level = percent_avail;
2631 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2632 }
2633 }
2634 #endif
2635 /*
2636 * ENCRYPTED SWAP:
2637 * The page could be encrypted, but
2638 * We don't have to decrypt it here
2639 * because we don't guarantee that the
2640 * data is actually valid at this point.
2641 * The page will get decrypted in
2642 * vm_fault_wire() if needed.
2643 */
2644 }
2645 assert(!mem->gobbled);
2646 mem->wire_count++;
2647 VM_PAGE_CHECK(mem);
2648 }
2649
2650 /*
2651 * vm_page_gobble:
2652 *
2653 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2654 *
2655 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2656 */
2657 void
2658 vm_page_gobble(
2659 register vm_page_t mem)
2660 {
2661 vm_page_lockspin_queues();
2662 VM_PAGE_CHECK(mem);
2663
2664 assert(!mem->gobbled);
2665 assert( !VM_PAGE_WIRED(mem));
2666
2667 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2668 if (!mem->private && !mem->fictitious)
2669 vm_page_wire_count++;
2670 }
2671 vm_page_gobble_count++;
2672 mem->gobbled = TRUE;
2673 vm_page_unlock_queues();
2674 }
2675
2676 /*
2677 * vm_page_unwire:
2678 *
2679 * Release one wiring of this page, potentially
2680 * enabling it to be paged again.
2681 *
2682 * The page's object and the page queues must be locked.
2683 */
2684 void
2685 vm_page_unwire(
2686 vm_page_t mem,
2687 boolean_t queueit)
2688 {
2689
2690 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2691
2692 VM_PAGE_CHECK(mem);
2693 assert(VM_PAGE_WIRED(mem));
2694 assert(mem->object != VM_OBJECT_NULL);
2695 #if DEBUG
2696 vm_object_lock_assert_exclusive(mem->object);
2697 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2698 #endif
2699 if (--mem->wire_count == 0) {
2700 assert(!mem->private && !mem->fictitious);
2701 vm_page_wire_count--;
2702 assert(mem->object->wired_page_count > 0);
2703 mem->object->wired_page_count--;
2704 assert(mem->object->resident_page_count >=
2705 mem->object->wired_page_count);
2706 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2707 OSAddAtomic(+1, &vm_page_purgeable_count);
2708 assert(vm_page_purgeable_wired_count > 0);
2709 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2710 }
2711 assert(!mem->laundry);
2712 assert(mem->object != kernel_object);
2713 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2714
2715 if (queueit == TRUE) {
2716 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2717 vm_page_deactivate(mem);
2718 } else {
2719 vm_page_activate(mem);
2720 }
2721 }
2722 #if CONFIG_EMBEDDED
2723 {
2724 int percent_avail;
2725
2726 /*
2727 * Decide if we need to poke the memorystatus notification thread.
2728 */
2729 percent_avail =
2730 (vm_page_active_count + vm_page_inactive_count +
2731 vm_page_speculative_count + vm_page_free_count +
2732 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
2733 atop_64(max_mem);
2734 if (percent_avail >= (kern_memorystatus_level + 5)) {
2735 kern_memorystatus_level = percent_avail;
2736 thread_wakeup((event_t)&kern_memorystatus_wakeup);
2737 }
2738 }
2739 #endif
2740 }
2741 VM_PAGE_CHECK(mem);
2742 }
2743
2744 /*
2745 * vm_page_deactivate:
2746 *
2747 * Returns the given page to the inactive list,
2748 * indicating that no physical maps have access
2749 * to this page. [Used by the physical mapping system.]
2750 *
2751 * The page queues must be locked.
2752 */
2753 void
2754 vm_page_deactivate(
2755 vm_page_t m)
2756 {
2757 vm_page_deactivate_internal(m, TRUE);
2758 }
2759
2760
2761 void
2762 vm_page_deactivate_internal(
2763 vm_page_t m,
2764 boolean_t clear_hw_reference)
2765 {
2766
2767 VM_PAGE_CHECK(m);
2768 assert(m->object != kernel_object);
2769 assert(m->phys_page != vm_page_guard_addr);
2770
2771 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2772 #if DEBUG
2773 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2774 #endif
2775 /*
2776 * This page is no longer very interesting. If it was
2777 * interesting (active or inactive/referenced), then we
2778 * clear the reference bit and (re)enter it in the
2779 * inactive queue. Note wired pages should not have
2780 * their reference bit cleared.
2781 */
2782
2783 if (m->absent && !m->unusual)
2784 panic("vm_page_deactivate: %p absent", m);
2785
2786 if (m->gobbled) { /* can this happen? */
2787 assert( !VM_PAGE_WIRED(m));
2788
2789 if (!m->private && !m->fictitious)
2790 vm_page_wire_count--;
2791 vm_page_gobble_count--;
2792 m->gobbled = FALSE;
2793 }
2794 if (m->private || (VM_PAGE_WIRED(m)))
2795 return;
2796
2797 if (!m->fictitious && !m->absent && clear_hw_reference == TRUE)
2798 pmap_clear_reference(m->phys_page);
2799
2800 m->reference = FALSE;
2801 m->no_cache = FALSE;
2802
2803 if (!m->inactive) {
2804 VM_PAGE_QUEUES_REMOVE(m);
2805
2806 assert(!m->laundry);
2807 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2808
2809 if (!IP_VALID(memory_manager_default) &&
2810 m->dirty && m->object->internal &&
2811 (m->object->purgable == VM_PURGABLE_DENY ||
2812 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2813 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2814 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2815 m->throttled = TRUE;
2816 vm_page_throttled_count++;
2817 } else {
2818 if (!m->fictitious && m->object->named && m->object->ref_count == 1) {
2819 vm_page_speculate(m, FALSE);
2820 #if DEVELOPMENT || DEBUG
2821 vm_page_speculative_recreated++;
2822 #endif
2823 return;
2824 } else {
2825 if (m->zero_fill) {
2826 queue_enter(&vm_page_queue_zf, m, vm_page_t, pageq);
2827 vm_zf_queue_count++;
2828 } else {
2829 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
2830 }
2831 }
2832 m->inactive = TRUE;
2833 if (!m->fictitious) {
2834 vm_page_inactive_count++;
2835 token_new_pagecount++;
2836 }
2837 }
2838 }
2839 }
2840
2841 /*
2842 * vm_page_activate:
2843 *
2844 * Put the specified page on the active list (if appropriate).
2845 *
2846 * The page queues must be locked.
2847 */
2848
2849 void
2850 vm_page_activate(
2851 register vm_page_t m)
2852 {
2853 VM_PAGE_CHECK(m);
2854 #ifdef FIXME_4778297
2855 assert(m->object != kernel_object);
2856 #endif
2857 assert(m->phys_page != vm_page_guard_addr);
2858 #if DEBUG
2859 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2860 #endif
2861
2862 if (m->absent && !m->unusual)
2863 panic("vm_page_activate: %p absent", m);
2864
2865 if (m->gobbled) {
2866 assert( !VM_PAGE_WIRED(m));
2867 if (!m->private && !m->fictitious)
2868 vm_page_wire_count--;
2869 vm_page_gobble_count--;
2870 m->gobbled = FALSE;
2871 }
2872 if (m->private)
2873 return;
2874
2875 #if DEBUG
2876 if (m->active)
2877 panic("vm_page_activate: already active");
2878 #endif
2879
2880 if (m->speculative) {
2881 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2882 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2883 }
2884
2885 VM_PAGE_QUEUES_REMOVE(m);
2886
2887 if ( !VM_PAGE_WIRED(m)) {
2888 assert(!m->laundry);
2889 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
2890 if (!IP_VALID(memory_manager_default) &&
2891 !m->fictitious && m->dirty && m->object->internal &&
2892 (m->object->purgable == VM_PURGABLE_DENY ||
2893 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2894 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2895 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2896 m->throttled = TRUE;
2897 vm_page_throttled_count++;
2898 } else {
2899 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2900 m->active = TRUE;
2901 if (!m->fictitious)
2902 vm_page_active_count++;
2903 }
2904 m->reference = TRUE;
2905 m->no_cache = FALSE;
2906 }
2907 VM_PAGE_CHECK(m);
2908 }
2909
2910
2911 /*
2912 * vm_page_speculate:
2913 *
2914 * Put the specified page on the speculative list (if appropriate).
2915 *
2916 * The page queues must be locked.
2917 */
2918 void
2919 vm_page_speculate(
2920 vm_page_t m,
2921 boolean_t new)
2922 {
2923 struct vm_speculative_age_q *aq;
2924
2925 VM_PAGE_CHECK(m);
2926 assert(m->object != kernel_object);
2927 assert(m->phys_page != vm_page_guard_addr);
2928 #if DEBUG
2929 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2930 #endif
2931
2932 if (m->absent && !m->unusual)
2933 panic("vm_page_speculate: %p absent", m);
2934
2935 VM_PAGE_QUEUES_REMOVE(m);
2936
2937 if ( !VM_PAGE_WIRED(m)) {
2938 mach_timespec_t ts;
2939 clock_sec_t sec;
2940 clock_nsec_t nsec;
2941
2942 clock_get_system_nanotime(&sec, &nsec);
2943 ts.tv_sec = (unsigned int) sec;
2944 ts.tv_nsec = nsec;
2945
2946 if (vm_page_speculative_count == 0) {
2947
2948 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2949 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2950
2951 aq = &vm_page_queue_speculative[speculative_age_index];
2952
2953 /*
2954 * set the timer to begin a new group
2955 */
2956 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2957 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2958
2959 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2960 } else {
2961 aq = &vm_page_queue_speculative[speculative_age_index];
2962
2963 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
2964
2965 speculative_age_index++;
2966
2967 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2968 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2969 if (speculative_age_index == speculative_steal_index) {
2970 speculative_steal_index = speculative_age_index + 1;
2971
2972 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2973 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2974 }
2975 aq = &vm_page_queue_speculative[speculative_age_index];
2976
2977 if (!queue_empty(&aq->age_q))
2978 vm_page_speculate_ageit(aq);
2979
2980 aq->age_ts.tv_sec = VM_PAGE_SPECULATIVE_Q_AGE_MS / 1000;
2981 aq->age_ts.tv_nsec = (VM_PAGE_SPECULATIVE_Q_AGE_MS % 1000) * 1000 * NSEC_PER_USEC;
2982
2983 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
2984 }
2985 }
2986 enqueue_tail(&aq->age_q, &m->pageq);
2987 m->speculative = TRUE;
2988 vm_page_speculative_count++;
2989
2990 if (new == TRUE) {
2991 m->object->pages_created++;
2992 #if DEVELOPMENT || DEBUG
2993 vm_page_speculative_created++;
2994 #endif
2995 }
2996 }
2997 VM_PAGE_CHECK(m);
2998 }
2999
3000
3001 /*
3002 * move pages from the specified aging bin to
3003 * the speculative bin that pageout_scan claims from
3004 *
3005 * The page queues must be locked.
3006 */
3007 void
3008 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3009 {
3010 struct vm_speculative_age_q *sq;
3011 vm_page_t t;
3012
3013 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3014
3015 if (queue_empty(&sq->age_q)) {
3016 sq->age_q.next = aq->age_q.next;
3017 sq->age_q.prev = aq->age_q.prev;
3018
3019 t = (vm_page_t)sq->age_q.next;
3020 t->pageq.prev = &sq->age_q;
3021
3022 t = (vm_page_t)sq->age_q.prev;
3023 t->pageq.next = &sq->age_q;
3024 } else {
3025 t = (vm_page_t)sq->age_q.prev;
3026 t->pageq.next = aq->age_q.next;
3027
3028 t = (vm_page_t)aq->age_q.next;
3029 t->pageq.prev = sq->age_q.prev;
3030
3031 t = (vm_page_t)aq->age_q.prev;
3032 t->pageq.next = &sq->age_q;
3033
3034 sq->age_q.prev = aq->age_q.prev;
3035 }
3036 queue_init(&aq->age_q);
3037 }
3038
3039
3040 void
3041 vm_page_lru(
3042 vm_page_t m)
3043 {
3044 VM_PAGE_CHECK(m);
3045 assert(m->object != kernel_object);
3046 assert(m->phys_page != vm_page_guard_addr);
3047
3048 #if DEBUG
3049 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3050 #endif
3051 if (m->active || m->reference)
3052 return;
3053
3054 if (m->private || (VM_PAGE_WIRED(m)))
3055 return;
3056
3057 m->no_cache = FALSE;
3058
3059 VM_PAGE_QUEUES_REMOVE(m);
3060
3061 assert(!m->laundry);
3062 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
3063
3064 queue_enter(&vm_page_queue_inactive, m, vm_page_t, pageq);
3065 m->inactive = TRUE;
3066
3067 vm_page_inactive_count++;
3068 token_new_pagecount++;
3069 }
3070
3071
3072 void
3073 vm_page_reactivate_all_throttled(void)
3074 {
3075 vm_page_t first_throttled, last_throttled;
3076 vm_page_t first_active;
3077 vm_page_t m;
3078 int extra_active_count;
3079
3080 extra_active_count = 0;
3081 vm_page_lock_queues();
3082 if (! queue_empty(&vm_page_queue_throttled)) {
3083 /*
3084 * Switch "throttled" pages to "active".
3085 */
3086 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3087 VM_PAGE_CHECK(m);
3088 assert(m->throttled);
3089 assert(!m->active);
3090 assert(!m->inactive);
3091 assert(!m->speculative);
3092 assert(!VM_PAGE_WIRED(m));
3093 if (!m->fictitious) {
3094 extra_active_count++;
3095 }
3096 m->throttled = FALSE;
3097 m->active = TRUE;
3098 VM_PAGE_CHECK(m);
3099 }
3100
3101 /*
3102 * Transfer the entire throttled queue to a regular LRU page queues.
3103 * We insert it at the head of the active queue, so that these pages
3104 * get re-evaluated by the LRU algorithm first, since they've been
3105 * completely out of it until now.
3106 */
3107 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3108 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3109 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3110 if (queue_empty(&vm_page_queue_active)) {
3111 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3112 } else {
3113 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3114 }
3115 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3116 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3117 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3118
3119 #if DEBUG
3120 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3121 #endif
3122 queue_init(&vm_page_queue_throttled);
3123 /*
3124 * Adjust the global page counts.
3125 */
3126 vm_page_active_count += extra_active_count;
3127 vm_page_throttled_count = 0;
3128 }
3129 assert(vm_page_throttled_count == 0);
3130 assert(queue_empty(&vm_page_queue_throttled));
3131 vm_page_unlock_queues();
3132 }
3133
3134
3135 /*
3136 * move pages from the indicated local queue to the global active queue
3137 * its ok to fail if we're below the hard limit and force == FALSE
3138 * the nolocks == TRUE case is to allow this function to be run on
3139 * the hibernate path
3140 */
3141
3142 void
3143 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3144 {
3145 struct vpl *lq;
3146 vm_page_t first_local, last_local;
3147 vm_page_t first_active;
3148 vm_page_t m;
3149 uint32_t count = 0;
3150
3151 if (vm_page_local_q == NULL)
3152 return;
3153
3154 lq = &vm_page_local_q[lid].vpl_un.vpl;
3155
3156 if (nolocks == FALSE) {
3157 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3158 if ( !vm_page_trylockspin_queues())
3159 return;
3160 } else
3161 vm_page_lockspin_queues();
3162
3163 VPL_LOCK(&lq->vpl_lock);
3164 }
3165 if (lq->vpl_count) {
3166 /*
3167 * Switch "local" pages to "active".
3168 */
3169 assert(!queue_empty(&lq->vpl_queue));
3170
3171 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3172 VM_PAGE_CHECK(m);
3173 assert(m->local);
3174 assert(!m->active);
3175 assert(!m->inactive);
3176 assert(!m->speculative);
3177 assert(!VM_PAGE_WIRED(m));
3178 assert(!m->throttled);
3179 assert(!m->fictitious);
3180
3181 if (m->local_id != lid)
3182 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3183
3184 m->local_id = 0;
3185 m->local = FALSE;
3186 m->active = TRUE;
3187 VM_PAGE_CHECK(m);
3188
3189 count++;
3190 }
3191 if (count != lq->vpl_count)
3192 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3193
3194 /*
3195 * Transfer the entire local queue to a regular LRU page queues.
3196 */
3197 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3198 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3199 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3200
3201 if (queue_empty(&vm_page_queue_active)) {
3202 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3203 } else {
3204 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3205 }
3206 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3207 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3208 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3209
3210 queue_init(&lq->vpl_queue);
3211 /*
3212 * Adjust the global page counts.
3213 */
3214 vm_page_active_count += lq->vpl_count;
3215 lq->vpl_count = 0;
3216 }
3217 assert(queue_empty(&lq->vpl_queue));
3218
3219 if (nolocks == FALSE) {
3220 VPL_UNLOCK(&lq->vpl_lock);
3221 vm_page_unlock_queues();
3222 }
3223 }
3224
3225 /*
3226 * vm_page_part_zero_fill:
3227 *
3228 * Zero-fill a part of the page.
3229 */
3230 void
3231 vm_page_part_zero_fill(
3232 vm_page_t m,
3233 vm_offset_t m_pa,
3234 vm_size_t len)
3235 {
3236 vm_page_t tmp;
3237
3238 VM_PAGE_CHECK(m);
3239 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3240 pmap_zero_part_page(m->phys_page, m_pa, len);
3241 #else
3242 while (1) {
3243 tmp = vm_page_grab();
3244 if (tmp == VM_PAGE_NULL) {
3245 vm_page_wait(THREAD_UNINT);
3246 continue;
3247 }
3248 break;
3249 }
3250 vm_page_zero_fill(tmp);
3251 if(m_pa != 0) {
3252 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3253 }
3254 if((m_pa + len) < PAGE_SIZE) {
3255 vm_page_part_copy(m, m_pa + len, tmp,
3256 m_pa + len, PAGE_SIZE - (m_pa + len));
3257 }
3258 vm_page_copy(tmp,m);
3259 VM_PAGE_FREE(tmp);
3260 #endif
3261
3262 }
3263
3264 /*
3265 * vm_page_zero_fill:
3266 *
3267 * Zero-fill the specified page.
3268 */
3269 void
3270 vm_page_zero_fill(
3271 vm_page_t m)
3272 {
3273 XPR(XPR_VM_PAGE,
3274 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3275 m->object, m->offset, m, 0,0);
3276
3277 VM_PAGE_CHECK(m);
3278
3279 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3280 pmap_zero_page(m->phys_page);
3281 }
3282
3283 /*
3284 * vm_page_part_copy:
3285 *
3286 * copy part of one page to another
3287 */
3288
3289 void
3290 vm_page_part_copy(
3291 vm_page_t src_m,
3292 vm_offset_t src_pa,
3293 vm_page_t dst_m,
3294 vm_offset_t dst_pa,
3295 vm_size_t len)
3296 {
3297 VM_PAGE_CHECK(src_m);
3298 VM_PAGE_CHECK(dst_m);
3299
3300 pmap_copy_part_page(src_m->phys_page, src_pa,
3301 dst_m->phys_page, dst_pa, len);
3302 }
3303
3304 /*
3305 * vm_page_copy:
3306 *
3307 * Copy one page to another
3308 *
3309 * ENCRYPTED SWAP:
3310 * The source page should not be encrypted. The caller should
3311 * make sure the page is decrypted first, if necessary.
3312 */
3313
3314 int vm_page_copy_cs_validations = 0;
3315 int vm_page_copy_cs_tainted = 0;
3316
3317 void
3318 vm_page_copy(
3319 vm_page_t src_m,
3320 vm_page_t dest_m)
3321 {
3322 XPR(XPR_VM_PAGE,
3323 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3324 src_m->object, src_m->offset,
3325 dest_m->object, dest_m->offset,
3326 0);
3327
3328 VM_PAGE_CHECK(src_m);
3329 VM_PAGE_CHECK(dest_m);
3330
3331 /*
3332 * ENCRYPTED SWAP:
3333 * The source page should not be encrypted at this point.
3334 * The destination page will therefore not contain encrypted
3335 * data after the copy.
3336 */
3337 if (src_m->encrypted) {
3338 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3339 }
3340 dest_m->encrypted = FALSE;
3341
3342 if (src_m->object != VM_OBJECT_NULL &&
3343 src_m->object->code_signed) {
3344 /*
3345 * We're copying a page from a code-signed object.
3346 * Whoever ends up mapping the copy page might care about
3347 * the original page's integrity, so let's validate the
3348 * source page now.
3349 */
3350 vm_page_copy_cs_validations++;
3351 vm_page_validate_cs(src_m);
3352 }
3353 /*
3354 * Propagate the cs_tainted bit to the copy page. Do not propagate
3355 * the cs_validated bit.
3356 */
3357 dest_m->cs_tainted = src_m->cs_tainted;
3358 if (dest_m->cs_tainted) {
3359 vm_page_copy_cs_tainted++;
3360 }
3361
3362 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3363 }
3364
3365 #if MACH_ASSERT
3366 static void
3367 _vm_page_print(
3368 vm_page_t p)
3369 {
3370 printf("vm_page %p: \n", p);
3371 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3372 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3373 printf(" next=%p\n", p->next);
3374 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3375 printf(" wire_count=%u\n", p->wire_count);
3376
3377 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3378 (p->local ? "" : "!"),
3379 (p->inactive ? "" : "!"),
3380 (p->active ? "" : "!"),
3381 (p->pageout_queue ? "" : "!"),
3382 (p->speculative ? "" : "!"),
3383 (p->laundry ? "" : "!"));
3384 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3385 (p->free ? "" : "!"),
3386 (p->reference ? "" : "!"),
3387 (p->gobbled ? "" : "!"),
3388 (p->private ? "" : "!"),
3389 (p->throttled ? "" : "!"));
3390 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3391 (p->busy ? "" : "!"),
3392 (p->wanted ? "" : "!"),
3393 (p->tabled ? "" : "!"),
3394 (p->fictitious ? "" : "!"),
3395 (p->pmapped ? "" : "!"),
3396 (p->wpmapped ? "" : "!"));
3397 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3398 (p->pageout ? "" : "!"),
3399 (p->absent ? "" : "!"),
3400 (p->error ? "" : "!"),
3401 (p->dirty ? "" : "!"),
3402 (p->cleaning ? "" : "!"),
3403 (p->precious ? "" : "!"),
3404 (p->clustered ? "" : "!"));
3405 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3406 (p->overwriting ? "" : "!"),
3407 (p->restart ? "" : "!"),
3408 (p->unusual ? "" : "!"),
3409 (p->encrypted ? "" : "!"),
3410 (p->encrypted_cleaning ? "" : "!"));
3411 printf(" %slist_req_pending, %sdump_cleaning, %scs_validated, %scs_tainted, %sno_cache\n",
3412 (p->list_req_pending ? "" : "!"),
3413 (p->dump_cleaning ? "" : "!"),
3414 (p->cs_validated ? "" : "!"),
3415 (p->cs_tainted ? "" : "!"),
3416 (p->no_cache ? "" : "!"));
3417 printf(" %szero_fill\n",
3418 (p->zero_fill ? "" : "!"));
3419
3420 printf("phys_page=0x%x\n", p->phys_page);
3421 }
3422
3423 /*
3424 * Check that the list of pages is ordered by
3425 * ascending physical address and has no holes.
3426 */
3427 static int
3428 vm_page_verify_contiguous(
3429 vm_page_t pages,
3430 unsigned int npages)
3431 {
3432 register vm_page_t m;
3433 unsigned int page_count;
3434 vm_offset_t prev_addr;
3435
3436 prev_addr = pages->phys_page;
3437 page_count = 1;
3438 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3439 if (m->phys_page != prev_addr + 1) {
3440 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3441 m, (long)prev_addr, m->phys_page);
3442 printf("pages %p page_count %d\n", pages, page_count);
3443 panic("vm_page_verify_contiguous: not contiguous!");
3444 }
3445 prev_addr = m->phys_page;
3446 ++page_count;
3447 }
3448 if (page_count != npages) {
3449 printf("pages %p actual count 0x%x but requested 0x%x\n",
3450 pages, page_count, npages);
3451 panic("vm_page_verify_contiguous: count error");
3452 }
3453 return 1;
3454 }
3455
3456
3457 /*
3458 * Check the free lists for proper length etc.
3459 */
3460 static unsigned int
3461 vm_page_verify_free_list(
3462 queue_head_t *vm_page_queue,
3463 unsigned int color,
3464 vm_page_t look_for_page,
3465 boolean_t expect_page)
3466 {
3467 unsigned int npages;
3468 vm_page_t m;
3469 vm_page_t prev_m;
3470 boolean_t found_page;
3471
3472 found_page = FALSE;
3473 npages = 0;
3474 prev_m = (vm_page_t) vm_page_queue;
3475 queue_iterate(vm_page_queue,
3476 m,
3477 vm_page_t,
3478 pageq) {
3479 if (m == look_for_page) {
3480 found_page = TRUE;
3481 }
3482 if ((vm_page_t) m->pageq.prev != prev_m)
3483 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3484 color, npages, m, m->pageq.prev, prev_m);
3485 if ( ! m->free )
3486 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3487 color, npages, m);
3488 if ( ! m->busy )
3489 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3490 color, npages, m);
3491 if ( color != (unsigned int) -1 && (m->phys_page & vm_color_mask) != color)
3492 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3493 color, npages, m, m->phys_page & vm_color_mask, color);
3494 ++npages;
3495 prev_m = m;
3496 }
3497 if (look_for_page != VM_PAGE_NULL) {
3498 unsigned int other_color;
3499
3500 if (expect_page && !found_page) {
3501 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3502 color, npages, look_for_page, look_for_page->phys_page);
3503 _vm_page_print(look_for_page);
3504 for (other_color = 0;
3505 other_color < vm_colors;
3506 other_color++) {
3507 if (other_color == color)
3508 continue;
3509 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3510 other_color, look_for_page, FALSE);
3511 }
3512 if (color != (unsigned int) -1) {
3513 vm_page_verify_free_list(&vm_lopage_queue_free,
3514 (unsigned int) -1, look_for_page, FALSE);
3515 }
3516
3517 panic("vm_page_verify_free_list(color=%u)\n", color);
3518 }
3519 if (!expect_page && found_page) {
3520 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3521 color, npages, look_for_page, look_for_page->phys_page);
3522 }
3523 }
3524 return npages;
3525 }
3526
3527 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3528 static void
3529 vm_page_verify_free_lists( void )
3530 {
3531 unsigned int color, npages, nlopages;
3532
3533 if (! vm_page_verify_free_lists_enabled)
3534 return;
3535
3536 npages = 0;
3537
3538 lck_mtx_lock(&vm_page_queue_free_lock);
3539
3540 for( color = 0; color < vm_colors; color++ ) {
3541 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3542 color, VM_PAGE_NULL, FALSE);
3543 }
3544
3545 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3546 (unsigned int) -1,
3547 VM_PAGE_NULL, FALSE);
3548 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3549 panic("vm_page_verify_free_lists: "
3550 "npages %u free_count %d nlopages %u lo_free_count %u",
3551 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3552 lck_mtx_unlock(&vm_page_queue_free_lock);
3553 }
3554
3555 void
3556 vm_page_queues_assert(
3557 vm_page_t mem,
3558 int val)
3559 {
3560 if (mem->free + mem->active + mem->inactive + mem->speculative +
3561 mem->throttled + mem->pageout_queue > (val)) {
3562 _vm_page_print(mem);
3563 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3564 }
3565 if (VM_PAGE_WIRED(mem)) {
3566 assert(!mem->active);
3567 assert(!mem->inactive);
3568 assert(!mem->speculative);
3569 assert(!mem->throttled);
3570 }
3571 }
3572 #endif /* MACH_ASSERT */
3573
3574
3575 /*
3576 * CONTIGUOUS PAGE ALLOCATION
3577 *
3578 * Find a region large enough to contain at least n pages
3579 * of contiguous physical memory.
3580 *
3581 * This is done by traversing the vm_page_t array in a linear fashion
3582 * we assume that the vm_page_t array has the avaiable physical pages in an
3583 * ordered, ascending list... this is currently true of all our implementations
3584 * and must remain so... there can be 'holes' in the array... we also can
3585 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3586 * which use to happen via 'vm_page_convert'... that function was no longer
3587 * being called and was removed...
3588 *
3589 * The basic flow consists of stabilizing some of the interesting state of
3590 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3591 * sweep at the beginning of the array looking for pages that meet our criterea
3592 * for a 'stealable' page... currently we are pretty conservative... if the page
3593 * meets this criterea and is physically contiguous to the previous page in the 'run'
3594 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3595 * and start to develop a new run... if at this point we've already considered
3596 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3597 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3598 * to other threads trying to acquire free pages (or move pages from q to q),
3599 * and then continue from the spot we left off... we only make 1 pass through the
3600 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3601 * which steals the pages from the queues they're currently on... pages on the free
3602 * queue can be stolen directly... pages that are on any of the other queues
3603 * must be removed from the object they are tabled on... this requires taking the
3604 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3605 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3606 * dump the pages we've currently stolen back to the free list, and pick up our
3607 * scan from the point where we aborted the 'current' run.
3608 *
3609 *
3610 * Requirements:
3611 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3612 *
3613 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3614 *
3615 * Algorithm:
3616 */
3617
3618 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3619
3620
3621 #define RESET_STATE_OF_RUN() \
3622 MACRO_BEGIN \
3623 prevcontaddr = -2; \
3624 start_pnum = -1; \
3625 free_considered = 0; \
3626 substitute_needed = 0; \
3627 npages = 0; \
3628 MACRO_END
3629
3630 /*
3631 * Can we steal in-use (i.e. not free) pages when searching for
3632 * physically-contiguous pages ?
3633 */
3634 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3635
3636 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3637 #if DEBUG
3638 int vm_page_find_contig_debug = 0;
3639 #endif
3640
3641 static vm_page_t
3642 vm_page_find_contiguous(
3643 unsigned int contig_pages,
3644 ppnum_t max_pnum,
3645 ppnum_t pnum_mask,
3646 boolean_t wire,
3647 int flags)
3648 {
3649 vm_page_t m = NULL;
3650 ppnum_t prevcontaddr;
3651 ppnum_t start_pnum;
3652 unsigned int npages, considered, scanned;
3653 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3654 unsigned int idx_last_contig_page_found = 0;
3655 int free_considered, free_available;
3656 int substitute_needed;
3657 boolean_t wrapped;
3658 #if DEBUG
3659 clock_sec_t tv_start_sec, tv_end_sec;
3660 clock_usec_t tv_start_usec, tv_end_usec;
3661 #endif
3662 #if MACH_ASSERT
3663 int yielded = 0;
3664 int dumped_run = 0;
3665 int stolen_pages = 0;
3666 #endif
3667
3668 if (contig_pages == 0)
3669 return VM_PAGE_NULL;
3670
3671 #if MACH_ASSERT
3672 vm_page_verify_free_lists();
3673 #endif
3674 #if DEBUG
3675 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3676 #endif
3677 vm_page_lock_queues();
3678 lck_mtx_lock(&vm_page_queue_free_lock);
3679
3680 RESET_STATE_OF_RUN();
3681
3682 scanned = 0;
3683 considered = 0;
3684 free_available = vm_page_free_count - vm_page_free_reserved;
3685
3686 wrapped = FALSE;
3687
3688 if(flags & KMA_LOMEM)
3689 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3690 else
3691 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3692
3693 orig_last_idx = idx_last_contig_page_found;
3694 last_idx = orig_last_idx;
3695
3696 for (page_idx = last_idx, start_idx = last_idx;
3697 npages < contig_pages && page_idx < vm_pages_count;
3698 page_idx++) {
3699 retry:
3700 if (wrapped &&
3701 npages == 0 &&
3702 page_idx >= orig_last_idx) {
3703 /*
3704 * We're back where we started and we haven't
3705 * found any suitable contiguous range. Let's
3706 * give up.
3707 */
3708 break;
3709 }
3710 scanned++;
3711 m = &vm_pages[page_idx];
3712
3713 assert(!m->fictitious);
3714 assert(!m->private);
3715
3716 if (max_pnum && m->phys_page > max_pnum) {
3717 /* no more low pages... */
3718 break;
3719 }
3720 if (!npages && ((m->phys_page & pnum_mask) != 0)) {
3721 /*
3722 * not aligned
3723 */
3724 RESET_STATE_OF_RUN();
3725
3726 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3727 m->encrypted || m->encrypted_cleaning || m->cs_validated || m->cs_tainted ||
3728 m->error || m->absent || m->pageout_queue || m->laundry || m->wanted || m->precious ||
3729 m->cleaning || m->overwriting || m->restart || m->unusual || m->list_req_pending ||
3730 m->pageout) {
3731 /*
3732 * page is in a transient state
3733 * or a state we don't want to deal
3734 * with, so don't consider it which
3735 * means starting a new run
3736 */
3737 RESET_STATE_OF_RUN();
3738
3739 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled) {
3740 /*
3741 * page needs to be on one of our queues
3742 * in order for it to be stable behind the
3743 * locks we hold at this point...
3744 * if not, don't consider it which
3745 * means starting a new run
3746 */
3747 RESET_STATE_OF_RUN();
3748
3749 } else if (!m->free && (!m->tabled || m->busy)) {
3750 /*
3751 * pages on the free list are always 'busy'
3752 * so we couldn't test for 'busy' in the check
3753 * for the transient states... pages that are
3754 * 'free' are never 'tabled', so we also couldn't
3755 * test for 'tabled'. So we check here to make
3756 * sure that a non-free page is not busy and is
3757 * tabled on an object...
3758 * if not, don't consider it which
3759 * means starting a new run
3760 */
3761 RESET_STATE_OF_RUN();
3762
3763 } else {
3764 if (m->phys_page != prevcontaddr + 1) {
3765 if ((m->phys_page & pnum_mask) != 0) {
3766 RESET_STATE_OF_RUN();
3767 goto did_consider;
3768 } else {
3769 npages = 1;
3770 start_idx = page_idx;
3771 start_pnum = m->phys_page;
3772 }
3773 } else {
3774 npages++;
3775 }
3776 prevcontaddr = m->phys_page;
3777
3778 VM_PAGE_CHECK(m);
3779 if (m->free) {
3780 free_considered++;
3781 } else {
3782 /*
3783 * This page is not free.
3784 * If we can't steal used pages,
3785 * we have to give up this run
3786 * and keep looking.
3787 * Otherwise, we might need to
3788 * move the contents of this page
3789 * into a substitute page.
3790 */
3791 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3792 if (m->pmapped || m->dirty) {
3793 substitute_needed++;
3794 }
3795 #else
3796 RESET_STATE_OF_RUN();
3797 #endif
3798 }
3799
3800 if ((free_considered + substitute_needed) > free_available) {
3801 /*
3802 * if we let this run continue
3803 * we will end up dropping the vm_page_free_count
3804 * below the reserve limit... we need to abort
3805 * this run, but we can at least re-consider this
3806 * page... thus the jump back to 'retry'
3807 */
3808 RESET_STATE_OF_RUN();
3809
3810 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
3811 considered++;
3812 goto retry;
3813 }
3814 /*
3815 * free_available == 0
3816 * so can't consider any free pages... if
3817 * we went to retry in this case, we'd
3818 * get stuck looking at the same page
3819 * w/o making any forward progress
3820 * we also want to take this path if we've already
3821 * reached our limit that controls the lock latency
3822 */
3823 }
3824 }
3825 did_consider:
3826 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
3827
3828 lck_mtx_unlock(&vm_page_queue_free_lock);
3829 vm_page_unlock_queues();
3830
3831 mutex_pause(0);
3832
3833 vm_page_lock_queues();
3834 lck_mtx_lock(&vm_page_queue_free_lock);
3835
3836 RESET_STATE_OF_RUN();
3837 /*
3838 * reset our free page limit since we
3839 * dropped the lock protecting the vm_page_free_queue
3840 */
3841 free_available = vm_page_free_count - vm_page_free_reserved;
3842 considered = 0;
3843 #if MACH_ASSERT
3844 yielded++;
3845 #endif
3846 goto retry;
3847 }
3848 considered++;
3849 }
3850 m = VM_PAGE_NULL;
3851
3852 if (npages != contig_pages) {
3853 if (!wrapped) {
3854 /*
3855 * We didn't find a contiguous range but we didn't
3856 * start from the very first page.
3857 * Start again from the very first page.
3858 */
3859 RESET_STATE_OF_RUN();
3860 if( flags & KMA_LOMEM)
3861 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
3862 else
3863 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
3864 last_idx = 0;
3865 page_idx = last_idx;
3866 wrapped = TRUE;
3867 goto retry;
3868 }
3869 lck_mtx_unlock(&vm_page_queue_free_lock);
3870 } else {
3871 vm_page_t m1;
3872 vm_page_t m2;
3873 unsigned int cur_idx;
3874 unsigned int tmp_start_idx;
3875 vm_object_t locked_object = VM_OBJECT_NULL;
3876 boolean_t abort_run = FALSE;
3877
3878 assert(page_idx - start_idx == contig_pages);
3879
3880 tmp_start_idx = start_idx;
3881
3882 /*
3883 * first pass through to pull the free pages
3884 * off of the free queue so that in case we
3885 * need substitute pages, we won't grab any
3886 * of the free pages in the run... we'll clear
3887 * the 'free' bit in the 2nd pass, and even in
3888 * an abort_run case, we'll collect all of the
3889 * free pages in this run and return them to the free list
3890 */
3891 while (start_idx < page_idx) {
3892
3893 m1 = &vm_pages[start_idx++];
3894
3895 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3896 assert(m1->free);
3897 #endif
3898
3899 if (m1->free) {
3900 unsigned int color;
3901
3902 color = m1->phys_page & vm_color_mask;
3903 #if MACH_ASSERT
3904 vm_page_verify_free_list(&vm_page_queue_free[color],
3905 color, m1, TRUE);
3906 #endif
3907 queue_remove(&vm_page_queue_free[color],
3908 m1,
3909 vm_page_t,
3910 pageq);
3911 m1->pageq.next = NULL;
3912 m1->pageq.prev = NULL;
3913 #if MACH_ASSERT
3914 vm_page_verify_free_list(&vm_page_queue_free[color],
3915 color, VM_PAGE_NULL, FALSE);
3916 #endif
3917 /*
3918 * Clear the "free" bit so that this page
3919 * does not get considered for another
3920 * concurrent physically-contiguous allocation.
3921 */
3922 m1->free = FALSE;
3923 assert(m1->busy);
3924
3925 vm_page_free_count--;
3926 }
3927 }
3928 /*
3929 * adjust global freelist counts
3930 */
3931 if (vm_page_free_count < vm_page_free_count_minimum)
3932 vm_page_free_count_minimum = vm_page_free_count;
3933
3934 if( flags & KMA_LOMEM)
3935 vm_page_lomem_find_contiguous_last_idx = page_idx;
3936 else
3937 vm_page_find_contiguous_last_idx = page_idx;
3938
3939 /*
3940 * we can drop the free queue lock at this point since
3941 * we've pulled any 'free' candidates off of the list
3942 * we need it dropped so that we can do a vm_page_grab
3943 * when substituing for pmapped/dirty pages
3944 */
3945 lck_mtx_unlock(&vm_page_queue_free_lock);
3946
3947 start_idx = tmp_start_idx;
3948 cur_idx = page_idx - 1;
3949
3950 while (start_idx++ < page_idx) {
3951 /*
3952 * must go through the list from back to front
3953 * so that the page list is created in the
3954 * correct order - low -> high phys addresses
3955 */
3956 m1 = &vm_pages[cur_idx--];
3957
3958 assert(!m1->free);
3959 if (m1->object == VM_OBJECT_NULL) {
3960 /*
3961 * page has already been removed from
3962 * the free list in the 1st pass
3963 */
3964 assert(m1->offset == (vm_object_offset_t) -1);
3965 assert(m1->busy);
3966 assert(!m1->wanted);
3967 assert(!m1->laundry);
3968 } else {
3969 vm_object_t object;
3970
3971 if (abort_run == TRUE)
3972 continue;
3973
3974 object = m1->object;
3975
3976 if (object != locked_object) {
3977 if (locked_object) {
3978 vm_object_unlock(locked_object);
3979 locked_object = VM_OBJECT_NULL;
3980 }
3981 if (vm_object_lock_try(object))
3982 locked_object = object;
3983 }
3984 if (locked_object == VM_OBJECT_NULL ||
3985 (VM_PAGE_WIRED(m1) || m1->gobbled ||
3986 m1->encrypted || m1->encrypted_cleaning || m1->cs_validated || m1->cs_tainted ||
3987 m1->error || m1->absent || m1->pageout_queue || m1->laundry || m1->wanted || m1->precious ||
3988 m1->cleaning || m1->overwriting || m1->restart || m1->unusual || m1->list_req_pending || m1->busy)) {
3989
3990 if (locked_object) {
3991 vm_object_unlock(locked_object);
3992 locked_object = VM_OBJECT_NULL;
3993 }
3994 tmp_start_idx = cur_idx;
3995 abort_run = TRUE;
3996 continue;
3997 }
3998 if (m1->pmapped || m1->dirty) {
3999 int refmod;
4000 vm_object_offset_t offset;
4001
4002 m2 = vm_page_grab();
4003
4004 if (m2 == VM_PAGE_NULL) {
4005 if (locked_object) {
4006 vm_object_unlock(locked_object);
4007 locked_object = VM_OBJECT_NULL;
4008 }
4009 tmp_start_idx = cur_idx;
4010 abort_run = TRUE;
4011 continue;
4012 }
4013 if (m1->pmapped)
4014 refmod = pmap_disconnect(m1->phys_page);
4015 else
4016 refmod = 0;
4017 vm_page_copy(m1, m2);
4018
4019 m2->reference = m1->reference;
4020 m2->dirty = m1->dirty;
4021
4022 if (refmod & VM_MEM_REFERENCED)
4023 m2->reference = TRUE;
4024 if (refmod & VM_MEM_MODIFIED)
4025 m2->dirty = TRUE;
4026 offset = m1->offset;
4027
4028 /*
4029 * completely cleans up the state
4030 * of the page so that it is ready
4031 * to be put onto the free list, or
4032 * for this purpose it looks like it
4033 * just came off of the free list
4034 */
4035 vm_page_free_prepare(m1);
4036
4037 /*
4038 * make sure we clear the ref/mod state
4039 * from the pmap layer... else we risk
4040 * inheriting state from the last time
4041 * this page was used...
4042 */
4043 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4044 /*
4045 * now put the substitute page on the object
4046 */
4047 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE);
4048
4049 if (m2->reference)
4050 vm_page_activate(m2);
4051 else
4052 vm_page_deactivate(m2);
4053
4054 PAGE_WAKEUP_DONE(m2);
4055
4056 } else {
4057 /*
4058 * completely cleans up the state
4059 * of the page so that it is ready
4060 * to be put onto the free list, or
4061 * for this purpose it looks like it
4062 * just came off of the free list
4063 */
4064 vm_page_free_prepare(m1);
4065 }
4066 #if MACH_ASSERT
4067 stolen_pages++;
4068 #endif
4069 }
4070 m1->pageq.next = (queue_entry_t) m;
4071 m1->pageq.prev = NULL;
4072 m = m1;
4073 }
4074 if (locked_object) {
4075 vm_object_unlock(locked_object);
4076 locked_object = VM_OBJECT_NULL;
4077 }
4078
4079 if (abort_run == TRUE) {
4080 if (m != VM_PAGE_NULL) {
4081 vm_page_free_list(m, FALSE);
4082 }
4083 #if MACH_ASSERT
4084 dumped_run++;
4085 #endif
4086 /*
4087 * want the index of the last
4088 * page in this run that was
4089 * successfully 'stolen', so back
4090 * it up 1 for the auto-decrement on use
4091 * and 1 more to bump back over this page
4092 */
4093 page_idx = tmp_start_idx + 2;
4094 if (page_idx >= vm_pages_count) {
4095 if (wrapped)
4096 goto done_scanning;
4097 page_idx = last_idx = 0;
4098 wrapped = TRUE;
4099 }
4100 abort_run = FALSE;
4101
4102 /*
4103 * We didn't find a contiguous range but we didn't
4104 * start from the very first page.
4105 * Start again from the very first page.
4106 */
4107 RESET_STATE_OF_RUN();
4108
4109 if( flags & KMA_LOMEM)
4110 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4111 else
4112 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4113
4114 last_idx = page_idx;
4115
4116 lck_mtx_lock(&vm_page_queue_free_lock);
4117 /*
4118 * reset our free page limit since we
4119 * dropped the lock protecting the vm_page_free_queue
4120 */
4121 free_available = vm_page_free_count - vm_page_free_reserved;
4122 goto retry;
4123 }
4124
4125 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4126
4127 if (wire == TRUE)
4128 m1->wire_count++;
4129 else
4130 m1->gobbled = TRUE;
4131 }
4132 if (wire == FALSE)
4133 vm_page_gobble_count += npages;
4134
4135 /*
4136 * gobbled pages are also counted as wired pages
4137 */
4138 vm_page_wire_count += npages;
4139
4140 assert(vm_page_verify_contiguous(m, npages));
4141 }
4142 done_scanning:
4143 vm_page_unlock_queues();
4144
4145 #if DEBUG
4146 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4147
4148 tv_end_sec -= tv_start_sec;
4149 if (tv_end_usec < tv_start_usec) {
4150 tv_end_sec--;
4151 tv_end_usec += 1000000;
4152 }
4153 tv_end_usec -= tv_start_usec;
4154 if (tv_end_usec >= 1000000) {
4155 tv_end_sec++;
4156 tv_end_sec -= 1000000;
4157 }
4158 if (vm_page_find_contig_debug) {
4159 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
4160 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4161 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4162 scanned, yielded, dumped_run, stolen_pages);
4163 }
4164
4165 #endif
4166 #if MACH_ASSERT
4167 vm_page_verify_free_lists();
4168 #endif
4169 return m;
4170 }
4171
4172 /*
4173 * Allocate a list of contiguous, wired pages.
4174 */
4175 kern_return_t
4176 cpm_allocate(
4177 vm_size_t size,
4178 vm_page_t *list,
4179 ppnum_t max_pnum,
4180 ppnum_t pnum_mask,
4181 boolean_t wire,
4182 int flags)
4183 {
4184 vm_page_t pages;
4185 unsigned int npages;
4186
4187 if (size % page_size != 0)
4188 return KERN_INVALID_ARGUMENT;
4189
4190 npages = (unsigned int) (size / PAGE_SIZE);
4191 if (npages != size / PAGE_SIZE) {
4192 /* 32-bit overflow */
4193 return KERN_INVALID_ARGUMENT;
4194 }
4195
4196 /*
4197 * Obtain a pointer to a subset of the free
4198 * list large enough to satisfy the request;
4199 * the region will be physically contiguous.
4200 */
4201 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4202
4203 if (pages == VM_PAGE_NULL)
4204 return KERN_NO_SPACE;
4205 /*
4206 * determine need for wakeups
4207 */
4208 if ((vm_page_free_count < vm_page_free_min) ||
4209 ((vm_page_free_count < vm_page_free_target) &&
4210 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4211 thread_wakeup((event_t) &vm_page_free_wanted);
4212
4213 #if CONFIG_EMBEDDED
4214 {
4215 int percent_avail;
4216
4217 /*
4218 * Decide if we need to poke the memorystatus notification thread.
4219 */
4220 percent_avail =
4221 (vm_page_active_count + vm_page_inactive_count +
4222 vm_page_speculative_count + vm_page_free_count +
4223 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
4224 atop_64(max_mem);
4225 if (percent_avail <= (kern_memorystatus_level - 5)) {
4226 kern_memorystatus_level = percent_avail;
4227 thread_wakeup((event_t)&kern_memorystatus_wakeup);
4228 }
4229 }
4230 #endif
4231 /*
4232 * The CPM pages should now be available and
4233 * ordered by ascending physical address.
4234 */
4235 assert(vm_page_verify_contiguous(pages, npages));
4236
4237 *list = pages;
4238 return KERN_SUCCESS;
4239 }
4240
4241
4242 kern_return_t
4243 vm_page_alloc_list(
4244 int page_count,
4245 int flags,
4246 vm_page_t *list)
4247 {
4248 vm_page_t lo_page_list = VM_PAGE_NULL;
4249 vm_page_t mem;
4250 int i;
4251
4252 if ( !(flags & KMA_LOMEM))
4253 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4254
4255 for (i = 0; i < page_count; i++) {
4256
4257 mem = vm_page_grablo();
4258
4259 if (mem == VM_PAGE_NULL) {
4260 if (lo_page_list)
4261 vm_page_free_list(lo_page_list, FALSE);
4262
4263 *list = VM_PAGE_NULL;
4264
4265 return (KERN_RESOURCE_SHORTAGE);
4266 }
4267 mem->pageq.next = (queue_entry_t) lo_page_list;
4268 lo_page_list = mem;
4269 }
4270 *list = lo_page_list;
4271
4272 return (KERN_SUCCESS);
4273 }
4274
4275 void
4276 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4277 {
4278 page->offset = offset;
4279 }
4280
4281 vm_page_t
4282 vm_page_get_next(vm_page_t page)
4283 {
4284 return ((vm_page_t) page->pageq.next);
4285 }
4286
4287 vm_object_offset_t
4288 vm_page_get_offset(vm_page_t page)
4289 {
4290 return (page->offset);
4291 }
4292
4293 ppnum_t
4294 vm_page_get_phys_page(vm_page_t page)
4295 {
4296 return (page->phys_page);
4297 }
4298
4299
4300 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4301
4302 #if HIBERNATION
4303
4304 static vm_page_t hibernate_gobble_queue;
4305
4306 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4307
4308 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4309 static int hibernate_flush_dirty_pages(void);
4310 static int hibernate_flush_queue(queue_head_t *, int);
4311 static void hibernate_dirty_page(vm_page_t);
4312
4313 void hibernate_flush_wait(void);
4314 void hibernate_mark_in_progress(void);
4315 void hibernate_clear_in_progress(void);
4316
4317
4318 struct hibernate_statistics {
4319 int hibernate_considered;
4320 int hibernate_reentered_on_q;
4321 int hibernate_found_dirty;
4322 int hibernate_skipped_cleaning;
4323 int hibernate_skipped_transient;
4324 int hibernate_skipped_precious;
4325 int hibernate_queue_nolock;
4326 int hibernate_queue_paused;
4327 int hibernate_throttled;
4328 int hibernate_throttle_timeout;
4329 int hibernate_drained;
4330 int hibernate_drain_timeout;
4331 int cd_lock_failed;
4332 int cd_found_precious;
4333 int cd_found_wired;
4334 int cd_found_busy;
4335 int cd_found_unusual;
4336 int cd_found_cleaning;
4337 int cd_found_laundry;
4338 int cd_found_dirty;
4339 int cd_local_free;
4340 int cd_total_free;
4341 int cd_vm_page_wire_count;
4342 int cd_pages;
4343 int cd_discarded;
4344 int cd_count_wire;
4345 } hibernate_stats;
4346
4347
4348
4349 static int
4350 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4351 {
4352 wait_result_t wait_result;
4353
4354 vm_page_lock_queues();
4355
4356 while (q->pgo_laundry) {
4357
4358 q->pgo_draining = TRUE;
4359
4360 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4361
4362 vm_page_unlock_queues();
4363
4364 wait_result = thread_block(THREAD_CONTINUE_NULL);
4365
4366 if (wait_result == THREAD_TIMED_OUT) {
4367 hibernate_stats.hibernate_drain_timeout++;
4368 return (1);
4369 }
4370 vm_page_lock_queues();
4371
4372 hibernate_stats.hibernate_drained++;
4373 }
4374 vm_page_unlock_queues();
4375
4376 return (0);
4377 }
4378
4379 static void
4380 hibernate_dirty_page(vm_page_t m)
4381 {
4382 vm_object_t object = m->object;
4383 struct vm_pageout_queue *q;
4384
4385 #if DEBUG
4386 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4387 #endif
4388 vm_object_lock_assert_exclusive(object);
4389
4390 /*
4391 * protect the object from collapse -
4392 * locking in the object's paging_offset.
4393 */
4394 vm_object_paging_begin(object);
4395
4396 m->list_req_pending = TRUE;
4397 m->cleaning = TRUE;
4398 m->busy = TRUE;
4399
4400 if (object->internal == TRUE)
4401 q = &vm_pageout_queue_internal;
4402 else
4403 q = &vm_pageout_queue_external;
4404
4405 /*
4406 * pgo_laundry count is tied to the laundry bit
4407 */
4408 m->laundry = TRUE;
4409 q->pgo_laundry++;
4410
4411 m->pageout_queue = TRUE;
4412 queue_enter(&q->pgo_pending, m, vm_page_t, pageq);
4413
4414 if (q->pgo_idle == TRUE) {
4415 q->pgo_idle = FALSE;
4416 thread_wakeup((event_t) &q->pgo_pending);
4417 }
4418 }
4419
4420 static int
4421 hibernate_flush_queue(queue_head_t *q, int qcount)
4422 {
4423 vm_page_t m;
4424 vm_object_t l_object = NULL;
4425 vm_object_t m_object = NULL;
4426 int refmod_state = 0;
4427 int try_failed_count = 0;
4428 int retval = 0;
4429 int current_run = 0;
4430 struct vm_pageout_queue *iq;
4431 struct vm_pageout_queue *eq;
4432 struct vm_pageout_queue *tq;
4433
4434
4435 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4436
4437 iq = &vm_pageout_queue_internal;
4438 eq = &vm_pageout_queue_external;
4439
4440 vm_page_lock_queues();
4441
4442 while (qcount && !queue_empty(q)) {
4443
4444 if (current_run++ == 1000) {
4445 if (hibernate_should_abort()) {
4446 retval = 1;
4447 break;
4448 }
4449 current_run = 0;
4450 }
4451
4452 m = (vm_page_t) queue_first(q);
4453 m_object = m->object;
4454
4455 /*
4456 * check to see if we currently are working
4457 * with the same object... if so, we've
4458 * already got the lock
4459 */
4460 if (m_object != l_object) {
4461 /*
4462 * the object associated with candidate page is
4463 * different from the one we were just working
4464 * with... dump the lock if we still own it
4465 */
4466 if (l_object != NULL) {
4467 vm_object_unlock(l_object);
4468 l_object = NULL;
4469 }
4470 /*
4471 * Try to lock object; since we've alread got the
4472 * page queues lock, we can only 'try' for this one.
4473 * if the 'try' fails, we need to do a mutex_pause
4474 * to allow the owner of the object lock a chance to
4475 * run...
4476 */
4477 if ( !vm_object_lock_try_scan(m_object)) {
4478
4479 if (try_failed_count > 20) {
4480 hibernate_stats.hibernate_queue_nolock++;
4481
4482 goto reenter_pg_on_q;
4483 }
4484 vm_pageout_scan_wants_object = m_object;
4485
4486 vm_page_unlock_queues();
4487 mutex_pause(try_failed_count++);
4488 vm_page_lock_queues();
4489
4490 hibernate_stats.hibernate_queue_paused++;
4491 continue;
4492 } else {
4493 l_object = m_object;
4494 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4495 }
4496 }
4497 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->busy || m->absent || m->error) {
4498 /*
4499 * page is not to be cleaned
4500 * put it back on the head of its queue
4501 */
4502 if (m->cleaning)
4503 hibernate_stats.hibernate_skipped_cleaning++;
4504 else
4505 hibernate_stats.hibernate_skipped_transient++;
4506
4507 goto reenter_pg_on_q;
4508 }
4509 if ( !m_object->pager_initialized && m_object->pager_created)
4510 goto reenter_pg_on_q;
4511
4512 if (m_object->copy == VM_OBJECT_NULL) {
4513 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4514 /*
4515 * let the normal hibernate image path
4516 * deal with these
4517 */
4518 goto reenter_pg_on_q;
4519 }
4520 }
4521 if ( !m->dirty && m->pmapped) {
4522 refmod_state = pmap_get_refmod(m->phys_page);
4523
4524 if ((refmod_state & VM_MEM_MODIFIED))
4525 m->dirty = TRUE;
4526 } else
4527 refmod_state = 0;
4528
4529 if ( !m->dirty) {
4530 /*
4531 * page is not to be cleaned
4532 * put it back on the head of its queue
4533 */
4534 if (m->precious)
4535 hibernate_stats.hibernate_skipped_precious++;
4536
4537 goto reenter_pg_on_q;
4538 }
4539 tq = NULL;
4540
4541 if (m_object->internal) {
4542 if (VM_PAGE_Q_THROTTLED(iq))
4543 tq = iq;
4544 } else if (VM_PAGE_Q_THROTTLED(eq))
4545 tq = eq;
4546
4547 if (tq != NULL) {
4548 wait_result_t wait_result;
4549 int wait_count = 5;
4550
4551 if (l_object != NULL) {
4552 vm_object_unlock(l_object);
4553 l_object = NULL;
4554 }
4555 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4556
4557 tq->pgo_throttled = TRUE;
4558
4559 while (retval == 0) {
4560
4561 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4562
4563 vm_page_unlock_queues();
4564
4565 wait_result = thread_block(THREAD_CONTINUE_NULL);
4566
4567 vm_page_lock_queues();
4568
4569 if (hibernate_should_abort())
4570 retval = 1;
4571
4572 if (wait_result != THREAD_TIMED_OUT)
4573 break;
4574
4575 if (--wait_count == 0) {
4576 hibernate_stats.hibernate_throttle_timeout++;
4577 retval = 1;
4578 }
4579 }
4580 if (retval)
4581 break;
4582
4583 hibernate_stats.hibernate_throttled++;
4584
4585 continue;
4586 }
4587 VM_PAGE_QUEUES_REMOVE(m);
4588
4589 hibernate_dirty_page(m);
4590
4591 hibernate_stats.hibernate_found_dirty++;
4592
4593 goto next_pg;
4594
4595 reenter_pg_on_q:
4596 queue_remove(q, m, vm_page_t, pageq);
4597 queue_enter(q, m, vm_page_t, pageq);
4598
4599 hibernate_stats.hibernate_reentered_on_q++;
4600 next_pg:
4601 hibernate_stats.hibernate_considered++;
4602
4603 qcount--;
4604 try_failed_count = 0;
4605 }
4606 if (l_object != NULL) {
4607 vm_object_unlock(l_object);
4608 l_object = NULL;
4609 }
4610 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4611
4612 vm_page_unlock_queues();
4613
4614 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
4615
4616 return (retval);
4617 }
4618
4619
4620 static int
4621 hibernate_flush_dirty_pages()
4622 {
4623 struct vm_speculative_age_q *aq;
4624 uint32_t i;
4625
4626 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
4627
4628 if (vm_page_local_q) {
4629 for (i = 0; i < vm_page_local_q_count; i++)
4630 vm_page_reactivate_local(i, TRUE, FALSE);
4631 }
4632
4633 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
4634 int qcount;
4635 vm_page_t m;
4636
4637 aq = &vm_page_queue_speculative[i];
4638
4639 if (queue_empty(&aq->age_q))
4640 continue;
4641 qcount = 0;
4642
4643 vm_page_lockspin_queues();
4644
4645 queue_iterate(&aq->age_q,
4646 m,
4647 vm_page_t,
4648 pageq)
4649 {
4650 qcount++;
4651 }
4652 vm_page_unlock_queues();
4653
4654 if (qcount) {
4655 if (hibernate_flush_queue(&aq->age_q, qcount))
4656 return (1);
4657 }
4658 }
4659 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count))
4660 return (1);
4661 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_zf_queue_count))
4662 return (1);
4663 if (hibernate_flush_queue(&vm_page_queue_zf, vm_zf_queue_count))
4664 return (1);
4665
4666 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
4667 return (1);
4668 return (hibernate_drain_pageout_queue(&vm_pageout_queue_external));
4669 }
4670
4671
4672 extern void IOSleep(unsigned int);
4673 extern int sync_internal(void);
4674
4675 int
4676 hibernate_flush_memory()
4677 {
4678 int retval;
4679
4680 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
4681
4682 IOSleep(2 * 1000);
4683
4684 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_NONE, vm_page_free_count, 0, 0, 0, 0);
4685
4686 if ((retval = hibernate_flush_dirty_pages()) == 0) {
4687 if (consider_buffer_cache_collect != NULL) {
4688
4689 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, vm_page_wire_count, 0, 0, 0, 0);
4690
4691 sync_internal();
4692 (void)(*consider_buffer_cache_collect)(1);
4693 consider_zone_gc(1);
4694
4695 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, vm_page_wire_count, 0, 0, 0, 0);
4696 }
4697 }
4698 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
4699
4700 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
4701 hibernate_stats.hibernate_considered,
4702 hibernate_stats.hibernate_reentered_on_q,
4703 hibernate_stats.hibernate_found_dirty);
4704 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) queue_nolock(%d)\n",
4705 hibernate_stats.hibernate_skipped_cleaning,
4706 hibernate_stats.hibernate_skipped_transient,
4707 hibernate_stats.hibernate_skipped_precious,
4708 hibernate_stats.hibernate_queue_nolock);
4709 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
4710 hibernate_stats.hibernate_queue_paused,
4711 hibernate_stats.hibernate_throttled,
4712 hibernate_stats.hibernate_throttle_timeout,
4713 hibernate_stats.hibernate_drained,
4714 hibernate_stats.hibernate_drain_timeout);
4715
4716 return (retval);
4717 }
4718
4719 static void
4720 hibernate_page_list_zero(hibernate_page_list_t *list)
4721 {
4722 uint32_t bank;
4723 hibernate_bitmap_t * bitmap;
4724
4725 bitmap = &list->bank_bitmap[0];
4726 for (bank = 0; bank < list->bank_count; bank++)
4727 {
4728 uint32_t last_bit;
4729
4730 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
4731 // set out-of-bound bits at end of bitmap.
4732 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
4733 if (last_bit)
4734 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
4735
4736 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
4737 }
4738 }
4739
4740 void
4741 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
4742 {
4743 uint32_t i;
4744 vm_page_t m;
4745 uint64_t start, end, timeout, nsec;
4746 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
4747 clock_get_uptime(&start);
4748
4749 for (i = 0; i < gobble_count; i++)
4750 {
4751 while (VM_PAGE_NULL == (m = vm_page_grab()))
4752 {
4753 clock_get_uptime(&end);
4754 if (end >= timeout)
4755 break;
4756 VM_PAGE_WAIT();
4757 }
4758 if (!m)
4759 break;
4760 m->busy = FALSE;
4761 vm_page_gobble(m);
4762
4763 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
4764 hibernate_gobble_queue = m;
4765 }
4766
4767 clock_get_uptime(&end);
4768 absolutetime_to_nanoseconds(end - start, &nsec);
4769 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
4770 }
4771
4772 void
4773 hibernate_free_gobble_pages(void)
4774 {
4775 vm_page_t m, next;
4776 uint32_t count = 0;
4777
4778 m = (vm_page_t) hibernate_gobble_queue;
4779 while(m)
4780 {
4781 next = (vm_page_t) m->pageq.next;
4782 vm_page_free(m);
4783 count++;
4784 m = next;
4785 }
4786 hibernate_gobble_queue = VM_PAGE_NULL;
4787
4788 if (count)
4789 HIBLOG("Freed %d pages\n", count);
4790 }
4791
4792 static boolean_t
4793 hibernate_consider_discard(vm_page_t m)
4794 {
4795 vm_object_t object = NULL;
4796 int refmod_state;
4797 boolean_t discard = FALSE;
4798
4799 do
4800 {
4801 if (m->private)
4802 panic("hibernate_consider_discard: private");
4803
4804 if (!vm_object_lock_try(m->object)) {
4805 hibernate_stats.cd_lock_failed++;
4806 break;
4807 }
4808 object = m->object;
4809
4810 if (VM_PAGE_WIRED(m)) {
4811 hibernate_stats.cd_found_wired++;
4812 break;
4813 }
4814 if (m->precious) {
4815 hibernate_stats.cd_found_precious++;
4816 break;
4817 }
4818 if (m->busy || !object->alive) {
4819 /*
4820 * Somebody is playing with this page.
4821 */
4822 hibernate_stats.cd_found_busy++;
4823 break;
4824 }
4825 if (m->absent || m->unusual || m->error) {
4826 /*
4827 * If it's unusual in anyway, ignore it
4828 */
4829 hibernate_stats.cd_found_unusual++;
4830 break;
4831 }
4832 if (m->cleaning) {
4833 hibernate_stats.cd_found_cleaning++;
4834 break;
4835 }
4836 if (m->laundry || m->list_req_pending) {
4837 hibernate_stats.cd_found_laundry++;
4838 break;
4839 }
4840 if (!m->dirty)
4841 {
4842 refmod_state = pmap_get_refmod(m->phys_page);
4843
4844 if (refmod_state & VM_MEM_REFERENCED)
4845 m->reference = TRUE;
4846 if (refmod_state & VM_MEM_MODIFIED)
4847 m->dirty = TRUE;
4848 }
4849
4850 /*
4851 * If it's clean or purgeable we can discard the page on wakeup.
4852 */
4853 discard = (!m->dirty)
4854 || (VM_PURGABLE_VOLATILE == object->purgable)
4855 || (VM_PURGABLE_EMPTY == object->purgable);
4856
4857 if (discard == FALSE)
4858 hibernate_stats.cd_found_dirty++;
4859 }
4860 while (FALSE);
4861
4862 if (object)
4863 vm_object_unlock(object);
4864
4865 return (discard);
4866 }
4867
4868
4869 static void
4870 hibernate_discard_page(vm_page_t m)
4871 {
4872 if (m->absent || m->unusual || m->error)
4873 /*
4874 * If it's unusual in anyway, ignore
4875 */
4876 return;
4877
4878 if (m->pmapped == TRUE)
4879 {
4880 __unused int refmod_state = pmap_disconnect(m->phys_page);
4881 }
4882
4883 if (m->laundry)
4884 panic("hibernate_discard_page(%p) laundry", m);
4885 if (m->private)
4886 panic("hibernate_discard_page(%p) private", m);
4887 if (m->fictitious)
4888 panic("hibernate_discard_page(%p) fictitious", m);
4889
4890 if (VM_PURGABLE_VOLATILE == m->object->purgable)
4891 {
4892 /* object should be on a queue */
4893 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
4894 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
4895 assert(old_queue);
4896 /* No need to lock page queue for token delete, hibernate_vm_unlock()
4897 makes sure these locks are uncontended before sleep */
4898 vm_purgeable_token_delete_first(old_queue);
4899 m->object->purgable = VM_PURGABLE_EMPTY;
4900 }
4901
4902 vm_page_free(m);
4903 }
4904
4905 /*
4906 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
4907 pages known to VM to not need saving are subtracted.
4908 Wired pages to be saved are present in page_list_wired, pageable in page_list.
4909 */
4910
4911 void
4912 hibernate_page_list_setall(hibernate_page_list_t * page_list,
4913 hibernate_page_list_t * page_list_wired,
4914 uint32_t * pagesOut)
4915 {
4916 uint64_t start, end, nsec;
4917 vm_page_t m;
4918 uint32_t pages = page_list->page_count;
4919 uint32_t count_zf = 0, count_throttled = 0;
4920 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0;
4921 uint32_t count_wire = pages;
4922 uint32_t count_discard_active = 0;
4923 uint32_t count_discard_inactive = 0;
4924 uint32_t count_discard_purgeable = 0;
4925 uint32_t count_discard_speculative = 0;
4926 uint32_t i;
4927 uint32_t bank;
4928 hibernate_bitmap_t * bitmap;
4929 hibernate_bitmap_t * bitmap_wired;
4930
4931
4932 HIBLOG("hibernate_page_list_setall start %p, %p\n", page_list, page_list_wired);
4933
4934 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
4935
4936 clock_get_uptime(&start);
4937
4938 hibernate_page_list_zero(page_list);
4939 hibernate_page_list_zero(page_list_wired);
4940
4941 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
4942 hibernate_stats.cd_pages = pages;
4943
4944 if (vm_page_local_q) {
4945 for (i = 0; i < vm_page_local_q_count; i++)
4946 vm_page_reactivate_local(i, TRUE, TRUE);
4947 }
4948
4949 m = (vm_page_t) hibernate_gobble_queue;
4950 while(m)
4951 {
4952 pages--;
4953 count_wire--;
4954 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4955 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4956 m = (vm_page_t) m->pageq.next;
4957 }
4958 #ifndef PPC
4959 for( i = 0; i < real_ncpus; i++ )
4960 {
4961 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
4962 {
4963 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
4964 {
4965 pages--;
4966 count_wire--;
4967 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4968 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4969
4970 hibernate_stats.cd_local_free++;
4971 hibernate_stats.cd_total_free++;
4972 }
4973 }
4974 }
4975 #endif
4976 for( i = 0; i < vm_colors; i++ )
4977 {
4978 queue_iterate(&vm_page_queue_free[i],
4979 m,
4980 vm_page_t,
4981 pageq)
4982 {
4983 pages--;
4984 count_wire--;
4985 hibernate_page_bitset(page_list, TRUE, m->phys_page);
4986 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
4987
4988 hibernate_stats.cd_total_free++;
4989 }
4990 }
4991
4992 queue_iterate(&vm_lopage_queue_free,
4993 m,
4994 vm_page_t,
4995 pageq)
4996 {
4997 pages--;
4998 count_wire--;
4999 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5000 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5001
5002 hibernate_stats.cd_total_free++;
5003 }
5004
5005 queue_iterate( &vm_page_queue_throttled,
5006 m,
5007 vm_page_t,
5008 pageq )
5009 {
5010 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5011 && hibernate_consider_discard(m))
5012 {
5013 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5014 count_discard_inactive++;
5015 }
5016 else
5017 count_throttled++;
5018 count_wire--;
5019 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5020 }
5021
5022 queue_iterate( &vm_page_queue_zf,
5023 m,
5024 vm_page_t,
5025 pageq )
5026 {
5027 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5028 && hibernate_consider_discard(m))
5029 {
5030 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5031 if (m->dirty)
5032 count_discard_purgeable++;
5033 else
5034 count_discard_inactive++;
5035 }
5036 else
5037 count_zf++;
5038 count_wire--;
5039 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5040 }
5041
5042 queue_iterate( &vm_page_queue_inactive,
5043 m,
5044 vm_page_t,
5045 pageq )
5046 {
5047 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5048 && hibernate_consider_discard(m))
5049 {
5050 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5051 if (m->dirty)
5052 count_discard_purgeable++;
5053 else
5054 count_discard_inactive++;
5055 }
5056 else
5057 count_inactive++;
5058 count_wire--;
5059 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5060 }
5061
5062 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5063 {
5064 queue_iterate(&vm_page_queue_speculative[i].age_q,
5065 m,
5066 vm_page_t,
5067 pageq)
5068 {
5069 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5070 && hibernate_consider_discard(m))
5071 {
5072 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5073 count_discard_speculative++;
5074 }
5075 else
5076 count_speculative++;
5077 count_wire--;
5078 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5079 }
5080 }
5081
5082 queue_iterate( &vm_page_queue_active,
5083 m,
5084 vm_page_t,
5085 pageq )
5086 {
5087 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5088 && hibernate_consider_discard(m))
5089 {
5090 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5091 if (m->dirty)
5092 count_discard_purgeable++;
5093 else
5094 count_discard_active++;
5095 }
5096 else
5097 count_active++;
5098 count_wire--;
5099 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5100 }
5101
5102 // pull wired from hibernate_bitmap
5103
5104 bitmap = &page_list->bank_bitmap[0];
5105 bitmap_wired = &page_list_wired->bank_bitmap[0];
5106 for (bank = 0; bank < page_list->bank_count; bank++)
5107 {
5108 for (i = 0; i < bitmap->bitmapwords; i++)
5109 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5110 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5111 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5112 }
5113
5114 // machine dependent adjustments
5115 hibernate_page_list_setall_machine(page_list, page_list_wired, &pages);
5116
5117 hibernate_stats.cd_count_wire = count_wire;
5118 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + count_discard_speculative;
5119
5120 clock_get_uptime(&end);
5121 absolutetime_to_nanoseconds(end - start, &nsec);
5122 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5123
5124 HIBLOG("pages %d, wire %d, act %d, inact %d, spec %d, zf %d, throt %d, could discard act %d inact %d purgeable %d spec %d\n",
5125 pages, count_wire, count_active, count_inactive, count_speculative, count_zf, count_throttled,
5126 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5127
5128 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative;
5129
5130 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5131 }
5132
5133 void
5134 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5135 {
5136 uint64_t start, end, nsec;
5137 vm_page_t m;
5138 vm_page_t next;
5139 uint32_t i;
5140 uint32_t count_discard_active = 0;
5141 uint32_t count_discard_inactive = 0;
5142 uint32_t count_discard_purgeable = 0;
5143 uint32_t count_discard_speculative = 0;
5144
5145 clock_get_uptime(&start);
5146
5147 m = (vm_page_t) queue_first(&vm_page_queue_zf);
5148 while (m && !queue_end(&vm_page_queue_zf, (queue_entry_t)m))
5149 {
5150 next = (vm_page_t) m->pageq.next;
5151 if (hibernate_page_bittst(page_list, m->phys_page))
5152 {
5153 if (m->dirty)
5154 count_discard_purgeable++;
5155 else
5156 count_discard_inactive++;
5157 hibernate_discard_page(m);
5158 }
5159 m = next;
5160 }
5161
5162 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5163 {
5164 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5165 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5166 {
5167 next = (vm_page_t) m->pageq.next;
5168 if (hibernate_page_bittst(page_list, m->phys_page))
5169 {
5170 count_discard_speculative++;
5171 hibernate_discard_page(m);
5172 }
5173 m = next;
5174 }
5175 }
5176
5177 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5178 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5179 {
5180 next = (vm_page_t) m->pageq.next;
5181 if (hibernate_page_bittst(page_list, m->phys_page))
5182 {
5183 if (m->dirty)
5184 count_discard_purgeable++;
5185 else
5186 count_discard_inactive++;
5187 hibernate_discard_page(m);
5188 }
5189 m = next;
5190 }
5191
5192 m = (vm_page_t) queue_first(&vm_page_queue_active);
5193 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5194 {
5195 next = (vm_page_t) m->pageq.next;
5196 if (hibernate_page_bittst(page_list, m->phys_page))
5197 {
5198 if (m->dirty)
5199 count_discard_purgeable++;
5200 else
5201 count_discard_active++;
5202 hibernate_discard_page(m);
5203 }
5204 m = next;
5205 }
5206
5207 clock_get_uptime(&end);
5208 absolutetime_to_nanoseconds(end - start, &nsec);
5209 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d\n",
5210 nsec / 1000000ULL,
5211 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative);
5212 }
5213
5214 #endif /* HIBERNATION */
5215
5216 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5217
5218 #include <mach_vm_debug.h>
5219 #if MACH_VM_DEBUG
5220
5221 #include <mach_debug/hash_info.h>
5222 #include <vm/vm_debug.h>
5223
5224 /*
5225 * Routine: vm_page_info
5226 * Purpose:
5227 * Return information about the global VP table.
5228 * Fills the buffer with as much information as possible
5229 * and returns the desired size of the buffer.
5230 * Conditions:
5231 * Nothing locked. The caller should provide
5232 * possibly-pageable memory.
5233 */
5234
5235 unsigned int
5236 vm_page_info(
5237 hash_info_bucket_t *info,
5238 unsigned int count)
5239 {
5240 unsigned int i;
5241 lck_spin_t *bucket_lock;
5242
5243 if (vm_page_bucket_count < count)
5244 count = vm_page_bucket_count;
5245
5246 for (i = 0; i < count; i++) {
5247 vm_page_bucket_t *bucket = &vm_page_buckets[i];
5248 unsigned int bucket_count = 0;
5249 vm_page_t m;
5250
5251 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
5252 lck_spin_lock(bucket_lock);
5253
5254 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
5255 bucket_count++;
5256
5257 lck_spin_unlock(bucket_lock);
5258
5259 /* don't touch pageable memory while holding locks */
5260 info[i].hib_count = bucket_count;
5261 }
5262
5263 return vm_page_bucket_count;
5264 }
5265 #endif /* MACH_VM_DEBUG */
5266
5267 #include <mach_kdb.h>
5268 #if MACH_KDB
5269
5270 #include <ddb/db_output.h>
5271 #include <vm/vm_print.h>
5272 #define printf kdbprintf
5273
5274 /*
5275 * Routine: vm_page_print [exported]
5276 */
5277 void
5278 vm_page_print(
5279 db_addr_t db_addr)
5280 {
5281 vm_page_t p;
5282
5283 p = (vm_page_t) (long) db_addr;
5284
5285 iprintf("page 0x%x\n", p);
5286
5287 db_indent += 2;
5288
5289 iprintf("object=0x%x", p->object);
5290 printf(", offset=0x%x", p->offset);
5291 printf(", wire_count=%d", p->wire_count);
5292
5293 iprintf("%slocal, %sinactive, %sactive, %sthrottled, %sgobbled, %slaundry, %sfree, %sref, %sencrypted\n",
5294 (p->local ? "" : "!"),
5295 (p->inactive ? "" : "!"),
5296 (p->active ? "" : "!"),
5297 (p->throttled ? "" : "!"),
5298 (p->gobbled ? "" : "!"),
5299 (p->laundry ? "" : "!"),
5300 (p->free ? "" : "!"),
5301 (p->reference ? "" : "!"),
5302 (p->encrypted ? "" : "!"));
5303 iprintf("%sbusy, %swanted, %stabled, %sfictitious, %sprivate, %sprecious\n",
5304 (p->busy ? "" : "!"),
5305 (p->wanted ? "" : "!"),
5306 (p->tabled ? "" : "!"),
5307 (p->fictitious ? "" : "!"),
5308 (p->private ? "" : "!"),
5309 (p->precious ? "" : "!"));
5310 iprintf("%sabsent, %serror, %sdirty, %scleaning, %spageout, %sclustered\n",
5311 (p->absent ? "" : "!"),
5312 (p->error ? "" : "!"),
5313 (p->dirty ? "" : "!"),
5314 (p->cleaning ? "" : "!"),
5315 (p->pageout ? "" : "!"),
5316 (p->clustered ? "" : "!"));
5317 iprintf("%soverwriting, %srestart, %sunusual\n",
5318 (p->overwriting ? "" : "!"),
5319 (p->restart ? "" : "!"),
5320 (p->unusual ? "" : "!"));
5321
5322 iprintf("phys_page=0x%x", p->phys_page);
5323
5324 db_indent -= 2;
5325 }
5326 #endif /* MACH_KDB */