]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <pexpert/pexpert.h>
89
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
93 #include <vm/vm_compressor.h>
94
95 #include <IOKit/IOHibernatePrivate.h>
96
97 #include <sys/kdebug.h>
98
99 boolean_t hibernate_cleaning_in_progress = FALSE;
100 boolean_t vm_page_free_verify = TRUE;
101
102 uint32_t vm_lopage_free_count = 0;
103 uint32_t vm_lopage_free_limit = 0;
104 uint32_t vm_lopage_lowater = 0;
105 boolean_t vm_lopage_refill = FALSE;
106 boolean_t vm_lopage_needed = FALSE;
107
108 lck_mtx_ext_t vm_page_queue_lock_ext;
109 lck_mtx_ext_t vm_page_queue_free_lock_ext;
110 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
111
112 int speculative_age_index = 0;
113 int speculative_steal_index = 0;
114 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
115
116
117 __private_extern__ void vm_page_init_lck_grp(void);
118
119 static void vm_page_free_prepare(vm_page_t page);
120 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
121
122
123
124
125 /*
126 * Associated with page of user-allocatable memory is a
127 * page structure.
128 */
129
130 /*
131 * These variables record the values returned by vm_page_bootstrap,
132 * for debugging purposes. The implementation of pmap_steal_memory
133 * and pmap_startup here also uses them internally.
134 */
135
136 vm_offset_t virtual_space_start;
137 vm_offset_t virtual_space_end;
138 uint32_t vm_page_pages;
139
140 /*
141 * The vm_page_lookup() routine, which provides for fast
142 * (virtual memory object, offset) to page lookup, employs
143 * the following hash table. The vm_page_{insert,remove}
144 * routines install and remove associations in the table.
145 * [This table is often called the virtual-to-physical,
146 * or VP, table.]
147 */
148 typedef struct {
149 vm_page_t pages;
150 #if MACH_PAGE_HASH_STATS
151 int cur_count; /* current count */
152 int hi_count; /* high water mark */
153 #endif /* MACH_PAGE_HASH_STATS */
154 } vm_page_bucket_t;
155
156
157 #define BUCKETS_PER_LOCK 16
158
159 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
160 unsigned int vm_page_bucket_count = 0; /* How big is array? */
161 unsigned int vm_page_hash_mask; /* Mask for hash function */
162 unsigned int vm_page_hash_shift; /* Shift for hash function */
163 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
164 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
165
166 lck_spin_t *vm_page_bucket_locks;
167
168 #if VM_PAGE_BUCKETS_CHECK
169 boolean_t vm_page_buckets_check_ready = FALSE;
170 #if VM_PAGE_FAKE_BUCKETS
171 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
172 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
173 #endif /* VM_PAGE_FAKE_BUCKETS */
174 #endif /* VM_PAGE_BUCKETS_CHECK */
175
176 #if MACH_PAGE_HASH_STATS
177 /* This routine is only for debug. It is intended to be called by
178 * hand by a developer using a kernel debugger. This routine prints
179 * out vm_page_hash table statistics to the kernel debug console.
180 */
181 void
182 hash_debug(void)
183 {
184 int i;
185 int numbuckets = 0;
186 int highsum = 0;
187 int maxdepth = 0;
188
189 for (i = 0; i < vm_page_bucket_count; i++) {
190 if (vm_page_buckets[i].hi_count) {
191 numbuckets++;
192 highsum += vm_page_buckets[i].hi_count;
193 if (vm_page_buckets[i].hi_count > maxdepth)
194 maxdepth = vm_page_buckets[i].hi_count;
195 }
196 }
197 printf("Total number of buckets: %d\n", vm_page_bucket_count);
198 printf("Number used buckets: %d = %d%%\n",
199 numbuckets, 100*numbuckets/vm_page_bucket_count);
200 printf("Number unused buckets: %d = %d%%\n",
201 vm_page_bucket_count - numbuckets,
202 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
203 printf("Sum of bucket max depth: %d\n", highsum);
204 printf("Average bucket depth: %d.%2d\n",
205 highsum/vm_page_bucket_count,
206 highsum%vm_page_bucket_count);
207 printf("Maximum bucket depth: %d\n", maxdepth);
208 }
209 #endif /* MACH_PAGE_HASH_STATS */
210
211 /*
212 * The virtual page size is currently implemented as a runtime
213 * variable, but is constant once initialized using vm_set_page_size.
214 * This initialization must be done in the machine-dependent
215 * bootstrap sequence, before calling other machine-independent
216 * initializations.
217 *
218 * All references to the virtual page size outside this
219 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
220 * constants.
221 */
222 vm_size_t page_size = PAGE_SIZE;
223 vm_size_t page_mask = PAGE_MASK;
224 int page_shift = PAGE_SHIFT;
225
226 /*
227 * Resident page structures are initialized from
228 * a template (see vm_page_alloc).
229 *
230 * When adding a new field to the virtual memory
231 * object structure, be sure to add initialization
232 * (see vm_page_bootstrap).
233 */
234 struct vm_page vm_page_template;
235
236 vm_page_t vm_pages = VM_PAGE_NULL;
237 unsigned int vm_pages_count = 0;
238 ppnum_t vm_page_lowest = 0;
239
240 /*
241 * Resident pages that represent real memory
242 * are allocated from a set of free lists,
243 * one per color.
244 */
245 unsigned int vm_colors;
246 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
247 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
248 queue_head_t vm_page_queue_free[MAX_COLORS];
249 unsigned int vm_page_free_wanted;
250 unsigned int vm_page_free_wanted_privileged;
251 unsigned int vm_page_free_count;
252 unsigned int vm_page_fictitious_count;
253
254 unsigned int vm_page_free_count_minimum; /* debugging */
255
256 /*
257 * Occasionally, the virtual memory system uses
258 * resident page structures that do not refer to
259 * real pages, for example to leave a page with
260 * important state information in the VP table.
261 *
262 * These page structures are allocated the way
263 * most other kernel structures are.
264 */
265 zone_t vm_page_zone;
266 vm_locks_array_t vm_page_locks;
267 decl_lck_mtx_data(,vm_page_alloc_lock)
268 lck_mtx_ext_t vm_page_alloc_lock_ext;
269
270 unsigned int io_throttle_zero_fill;
271
272 unsigned int vm_page_local_q_count = 0;
273 unsigned int vm_page_local_q_soft_limit = 250;
274 unsigned int vm_page_local_q_hard_limit = 500;
275 struct vplq *vm_page_local_q = NULL;
276
277 /* N.B. Guard and fictitious pages must not
278 * be assigned a zero phys_page value.
279 */
280 /*
281 * Fictitious pages don't have a physical address,
282 * but we must initialize phys_page to something.
283 * For debugging, this should be a strange value
284 * that the pmap module can recognize in assertions.
285 */
286 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
287
288 /*
289 * Guard pages are not accessible so they don't
290 * need a physical address, but we need to enter
291 * one in the pmap.
292 * Let's make it recognizable and make sure that
293 * we don't use a real physical page with that
294 * physical address.
295 */
296 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
297
298 /*
299 * Resident page structures are also chained on
300 * queues that are used by the page replacement
301 * system (pageout daemon). These queues are
302 * defined here, but are shared by the pageout
303 * module. The inactive queue is broken into
304 * file backed and anonymous for convenience as the
305 * pageout daemon often assignes a higher
306 * importance to anonymous pages (less likely to pick)
307 */
308 queue_head_t vm_page_queue_active;
309 queue_head_t vm_page_queue_inactive;
310 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
311 queue_head_t vm_page_queue_throttled;
312
313 unsigned int vm_page_active_count;
314 unsigned int vm_page_inactive_count;
315 unsigned int vm_page_anonymous_count;
316 unsigned int vm_page_throttled_count;
317 unsigned int vm_page_speculative_count;
318 unsigned int vm_page_wire_count;
319 unsigned int vm_page_wire_count_initial;
320 unsigned int vm_page_gobble_count = 0;
321 unsigned int vm_page_wire_count_warning = 0;
322 unsigned int vm_page_gobble_count_warning = 0;
323
324 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
325 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
326 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
327
328 unsigned int vm_page_external_count = 0;
329 unsigned int vm_page_internal_count = 0;
330 unsigned int vm_page_pageable_external_count = 0;
331 unsigned int vm_page_pageable_internal_count = 0;
332
333 #if DEVELOPMENT || DEBUG
334 unsigned int vm_page_speculative_recreated = 0;
335 unsigned int vm_page_speculative_created = 0;
336 unsigned int vm_page_speculative_used = 0;
337 #endif
338
339 queue_head_t vm_page_queue_cleaned;
340
341 unsigned int vm_page_cleaned_count = 0;
342 unsigned int vm_pageout_enqueued_cleaned = 0;
343
344 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
345 ppnum_t max_valid_low_ppnum = 0xffffffff;
346
347
348 /*
349 * Several page replacement parameters are also
350 * shared with this module, so that page allocation
351 * (done here in vm_page_alloc) can trigger the
352 * pageout daemon.
353 */
354 unsigned int vm_page_free_target = 0;
355 unsigned int vm_page_free_min = 0;
356 unsigned int vm_page_throttle_limit = 0;
357 uint32_t vm_page_creation_throttle = 0;
358 unsigned int vm_page_inactive_target = 0;
359 unsigned int vm_page_anonymous_min = 0;
360 unsigned int vm_page_inactive_min = 0;
361 unsigned int vm_page_free_reserved = 0;
362 unsigned int vm_page_throttle_count = 0;
363
364
365 /*
366 * The VM system has a couple of heuristics for deciding
367 * that pages are "uninteresting" and should be placed
368 * on the inactive queue as likely candidates for replacement.
369 * These variables let the heuristics be controlled at run-time
370 * to make experimentation easier.
371 */
372
373 boolean_t vm_page_deactivate_hint = TRUE;
374
375 struct vm_page_stats_reusable vm_page_stats_reusable;
376
377 /*
378 * vm_set_page_size:
379 *
380 * Sets the page size, perhaps based upon the memory
381 * size. Must be called before any use of page-size
382 * dependent functions.
383 *
384 * Sets page_shift and page_mask from page_size.
385 */
386 void
387 vm_set_page_size(void)
388 {
389 page_mask = page_size - 1;
390
391 if ((page_mask & page_size) != 0)
392 panic("vm_set_page_size: page size not a power of two");
393
394 for (page_shift = 0; ; page_shift++)
395 if ((1U << page_shift) == page_size)
396 break;
397 }
398
399
400 /* Called once during statup, once the cache geometry is known.
401 */
402 static void
403 vm_page_set_colors( void )
404 {
405 unsigned int n, override;
406
407 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
408 n = override;
409 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
410 n = vm_cache_geometry_colors;
411 else n = DEFAULT_COLORS; /* use default if all else fails */
412
413 if ( n == 0 )
414 n = 1;
415 if ( n > MAX_COLORS )
416 n = MAX_COLORS;
417
418 /* the count must be a power of 2 */
419 if ( ( n & (n - 1)) != 0 )
420 panic("vm_page_set_colors");
421
422 vm_colors = n;
423 vm_color_mask = n - 1;
424 }
425
426
427 lck_grp_t vm_page_lck_grp_free;
428 lck_grp_t vm_page_lck_grp_queue;
429 lck_grp_t vm_page_lck_grp_local;
430 lck_grp_t vm_page_lck_grp_purge;
431 lck_grp_t vm_page_lck_grp_alloc;
432 lck_grp_t vm_page_lck_grp_bucket;
433 lck_grp_attr_t vm_page_lck_grp_attr;
434 lck_attr_t vm_page_lck_attr;
435
436
437 __private_extern__ void
438 vm_page_init_lck_grp(void)
439 {
440 /*
441 * initialze the vm_page lock world
442 */
443 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
444 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
445 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
446 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
447 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
448 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
449 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
450 lck_attr_setdefault(&vm_page_lck_attr);
451 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
452
453 vm_compressor_init_locks();
454 }
455
456 void
457 vm_page_init_local_q()
458 {
459 unsigned int num_cpus;
460 unsigned int i;
461 struct vplq *t_local_q;
462
463 num_cpus = ml_get_max_cpus();
464
465 /*
466 * no point in this for a uni-processor system
467 */
468 if (num_cpus >= 2) {
469 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
470
471 for (i = 0; i < num_cpus; i++) {
472 struct vpl *lq;
473
474 lq = &t_local_q[i].vpl_un.vpl;
475 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
476 queue_init(&lq->vpl_queue);
477 lq->vpl_count = 0;
478 lq->vpl_internal_count = 0;
479 lq->vpl_external_count = 0;
480 }
481 vm_page_local_q_count = num_cpus;
482
483 vm_page_local_q = (struct vplq *)t_local_q;
484 }
485 }
486
487
488 /*
489 * vm_page_bootstrap:
490 *
491 * Initializes the resident memory module.
492 *
493 * Allocates memory for the page cells, and
494 * for the object/offset-to-page hash table headers.
495 * Each page cell is initialized and placed on the free list.
496 * Returns the range of available kernel virtual memory.
497 */
498
499 void
500 vm_page_bootstrap(
501 vm_offset_t *startp,
502 vm_offset_t *endp)
503 {
504 register vm_page_t m;
505 unsigned int i;
506 unsigned int log1;
507 unsigned int log2;
508 unsigned int size;
509
510 /*
511 * Initialize the vm_page template.
512 */
513
514 m = &vm_page_template;
515 bzero(m, sizeof (*m));
516
517 m->pageq.next = NULL;
518 m->pageq.prev = NULL;
519 m->listq.next = NULL;
520 m->listq.prev = NULL;
521 m->next = VM_PAGE_NULL;
522
523 m->object = VM_OBJECT_NULL; /* reset later */
524 m->offset = (vm_object_offset_t) -1; /* reset later */
525
526 m->wire_count = 0;
527 m->local = FALSE;
528 m->inactive = FALSE;
529 m->active = FALSE;
530 m->pageout_queue = FALSE;
531 m->speculative = FALSE;
532 m->laundry = FALSE;
533 m->free = FALSE;
534 m->reference = FALSE;
535 m->gobbled = FALSE;
536 m->private = FALSE;
537 m->throttled = FALSE;
538 m->__unused_pageq_bits = 0;
539
540 m->phys_page = 0; /* reset later */
541
542 m->busy = TRUE;
543 m->wanted = FALSE;
544 m->tabled = FALSE;
545 m->hashed = FALSE;
546 m->fictitious = FALSE;
547 m->pmapped = FALSE;
548 m->wpmapped = FALSE;
549 m->pageout = FALSE;
550 m->absent = FALSE;
551 m->error = FALSE;
552 m->dirty = FALSE;
553 m->cleaning = FALSE;
554 m->precious = FALSE;
555 m->clustered = FALSE;
556 m->overwriting = FALSE;
557 m->restart = FALSE;
558 m->unusual = FALSE;
559 m->encrypted = FALSE;
560 m->encrypted_cleaning = FALSE;
561 m->cs_validated = FALSE;
562 m->cs_tainted = FALSE;
563 m->no_cache = FALSE;
564 m->reusable = FALSE;
565 m->slid = FALSE;
566 m->was_dirty = FALSE;
567 m->xpmapped = FALSE;
568 m->compressor = FALSE;
569 m->written_by_kernel = FALSE;
570 m->__unused_object_bits = 0;
571
572 /*
573 * Initialize the page queues.
574 */
575 vm_page_init_lck_grp();
576
577 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
578 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
579 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
580
581 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
582 int group;
583
584 purgeable_queues[i].token_q_head = 0;
585 purgeable_queues[i].token_q_tail = 0;
586 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
587 queue_init(&purgeable_queues[i].objq[group]);
588
589 purgeable_queues[i].type = i;
590 purgeable_queues[i].new_pages = 0;
591 #if MACH_ASSERT
592 purgeable_queues[i].debug_count_tokens = 0;
593 purgeable_queues[i].debug_count_objects = 0;
594 #endif
595 };
596
597 for (i = 0; i < MAX_COLORS; i++ )
598 queue_init(&vm_page_queue_free[i]);
599
600 queue_init(&vm_lopage_queue_free);
601 queue_init(&vm_page_queue_active);
602 queue_init(&vm_page_queue_inactive);
603 queue_init(&vm_page_queue_cleaned);
604 queue_init(&vm_page_queue_throttled);
605 queue_init(&vm_page_queue_anonymous);
606
607 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
608 queue_init(&vm_page_queue_speculative[i].age_q);
609
610 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
611 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
612 }
613 vm_page_free_wanted = 0;
614 vm_page_free_wanted_privileged = 0;
615
616 vm_page_set_colors();
617
618
619 /*
620 * Steal memory for the map and zone subsystems.
621 */
622 zone_steal_memory();
623 vm_map_steal_memory();
624
625 /*
626 * Allocate (and initialize) the virtual-to-physical
627 * table hash buckets.
628 *
629 * The number of buckets should be a power of two to
630 * get a good hash function. The following computation
631 * chooses the first power of two that is greater
632 * than the number of physical pages in the system.
633 */
634
635 if (vm_page_bucket_count == 0) {
636 unsigned int npages = pmap_free_pages();
637
638 vm_page_bucket_count = 1;
639 while (vm_page_bucket_count < npages)
640 vm_page_bucket_count <<= 1;
641 }
642 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
643
644 vm_page_hash_mask = vm_page_bucket_count - 1;
645
646 /*
647 * Calculate object shift value for hashing algorithm:
648 * O = log2(sizeof(struct vm_object))
649 * B = log2(vm_page_bucket_count)
650 * hash shifts the object left by
651 * B/2 - O
652 */
653 size = vm_page_bucket_count;
654 for (log1 = 0; size > 1; log1++)
655 size /= 2;
656 size = sizeof(struct vm_object);
657 for (log2 = 0; size > 1; log2++)
658 size /= 2;
659 vm_page_hash_shift = log1/2 - log2 + 1;
660
661 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
662 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
663 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
664
665 if (vm_page_hash_mask & vm_page_bucket_count)
666 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
667
668 #if VM_PAGE_BUCKETS_CHECK
669 #if VM_PAGE_FAKE_BUCKETS
670 /*
671 * Allocate a decoy set of page buckets, to detect
672 * any stomping there.
673 */
674 vm_page_fake_buckets = (vm_page_bucket_t *)
675 pmap_steal_memory(vm_page_bucket_count *
676 sizeof(vm_page_bucket_t));
677 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
678 vm_page_fake_buckets_end =
679 vm_map_round_page((vm_page_fake_buckets_start +
680 (vm_page_bucket_count *
681 sizeof (vm_page_bucket_t))),
682 PAGE_MASK);
683 char *cp;
684 for (cp = (char *)vm_page_fake_buckets_start;
685 cp < (char *)vm_page_fake_buckets_end;
686 cp++) {
687 *cp = 0x5a;
688 }
689 #endif /* VM_PAGE_FAKE_BUCKETS */
690 #endif /* VM_PAGE_BUCKETS_CHECK */
691
692 vm_page_buckets = (vm_page_bucket_t *)
693 pmap_steal_memory(vm_page_bucket_count *
694 sizeof(vm_page_bucket_t));
695
696 vm_page_bucket_locks = (lck_spin_t *)
697 pmap_steal_memory(vm_page_bucket_lock_count *
698 sizeof(lck_spin_t));
699
700 for (i = 0; i < vm_page_bucket_count; i++) {
701 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
702
703 bucket->pages = VM_PAGE_NULL;
704 #if MACH_PAGE_HASH_STATS
705 bucket->cur_count = 0;
706 bucket->hi_count = 0;
707 #endif /* MACH_PAGE_HASH_STATS */
708 }
709
710 for (i = 0; i < vm_page_bucket_lock_count; i++)
711 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
712
713 #if VM_PAGE_BUCKETS_CHECK
714 vm_page_buckets_check_ready = TRUE;
715 #endif /* VM_PAGE_BUCKETS_CHECK */
716
717 /*
718 * Machine-dependent code allocates the resident page table.
719 * It uses vm_page_init to initialize the page frames.
720 * The code also returns to us the virtual space available
721 * to the kernel. We don't trust the pmap module
722 * to get the alignment right.
723 */
724
725 pmap_startup(&virtual_space_start, &virtual_space_end);
726 virtual_space_start = round_page(virtual_space_start);
727 virtual_space_end = trunc_page(virtual_space_end);
728
729 *startp = virtual_space_start;
730 *endp = virtual_space_end;
731
732 /*
733 * Compute the initial "wire" count.
734 * Up until now, the pages which have been set aside are not under
735 * the VM system's control, so although they aren't explicitly
736 * wired, they nonetheless can't be moved. At this moment,
737 * all VM managed pages are "free", courtesy of pmap_startup.
738 */
739 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
740 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
741 vm_page_wire_count_initial = vm_page_wire_count;
742 vm_page_free_count_minimum = vm_page_free_count;
743
744 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
745 vm_page_free_count, vm_page_wire_count);
746
747 simple_lock_init(&vm_paging_lock, 0);
748 }
749
750 #ifndef MACHINE_PAGES
751 /*
752 * We implement pmap_steal_memory and pmap_startup with the help
753 * of two simpler functions, pmap_virtual_space and pmap_next_page.
754 */
755
756 void *
757 pmap_steal_memory(
758 vm_size_t size)
759 {
760 vm_offset_t addr, vaddr;
761 ppnum_t phys_page;
762
763 /*
764 * We round the size to a round multiple.
765 */
766
767 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
768
769 /*
770 * If this is the first call to pmap_steal_memory,
771 * we have to initialize ourself.
772 */
773
774 if (virtual_space_start == virtual_space_end) {
775 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
776
777 /*
778 * The initial values must be aligned properly, and
779 * we don't trust the pmap module to do it right.
780 */
781
782 virtual_space_start = round_page(virtual_space_start);
783 virtual_space_end = trunc_page(virtual_space_end);
784 }
785
786 /*
787 * Allocate virtual memory for this request.
788 */
789
790 addr = virtual_space_start;
791 virtual_space_start += size;
792
793 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
794
795 /*
796 * Allocate and map physical pages to back new virtual pages.
797 */
798
799 for (vaddr = round_page(addr);
800 vaddr < addr + size;
801 vaddr += PAGE_SIZE) {
802
803 if (!pmap_next_page_hi(&phys_page))
804 panic("pmap_steal_memory");
805
806 /*
807 * XXX Logically, these mappings should be wired,
808 * but some pmap modules barf if they are.
809 */
810 #if defined(__LP64__)
811 pmap_pre_expand(kernel_pmap, vaddr);
812 #endif
813
814 pmap_enter(kernel_pmap, vaddr, phys_page,
815 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
816 VM_WIMG_USE_DEFAULT, FALSE);
817 /*
818 * Account for newly stolen memory
819 */
820 vm_page_wire_count++;
821
822 }
823
824 return (void *) addr;
825 }
826
827 void
828 pmap_startup(
829 vm_offset_t *startp,
830 vm_offset_t *endp)
831 {
832 unsigned int i, npages, pages_initialized, fill, fillval;
833 ppnum_t phys_page;
834 addr64_t tmpaddr;
835
836 /*
837 * We calculate how many page frames we will have
838 * and then allocate the page structures in one chunk.
839 */
840
841 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
842 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
843 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
844
845 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
846
847 /*
848 * Initialize the page frames.
849 */
850 for (i = 0, pages_initialized = 0; i < npages; i++) {
851 if (!pmap_next_page(&phys_page))
852 break;
853 if (pages_initialized == 0 || phys_page < vm_page_lowest)
854 vm_page_lowest = phys_page;
855
856 vm_page_init(&vm_pages[i], phys_page, FALSE);
857 vm_page_pages++;
858 pages_initialized++;
859 }
860 vm_pages_count = pages_initialized;
861
862 /*
863 * Check if we want to initialize pages to a known value
864 */
865 fill = 0; /* Assume no fill */
866 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
867 #if DEBUG
868 /* This slows down booting the DEBUG kernel, particularly on
869 * large memory systems, but is worthwhile in deterministically
870 * trapping uninitialized memory usage.
871 */
872 if (fill == 0) {
873 fill = 1;
874 fillval = 0xDEB8F177;
875 }
876 #endif
877 if (fill)
878 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
879 // -debug code remove
880 if (2 == vm_himemory_mode) {
881 // free low -> high so high is preferred
882 for (i = 1; i <= pages_initialized; i++) {
883 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
884 vm_page_release(&vm_pages[i - 1]);
885 }
886 }
887 else
888 // debug code remove-
889
890 /*
891 * Release pages in reverse order so that physical pages
892 * initially get allocated in ascending addresses. This keeps
893 * the devices (which must address physical memory) happy if
894 * they require several consecutive pages.
895 */
896 for (i = pages_initialized; i > 0; i--) {
897 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
898 vm_page_release(&vm_pages[i - 1]);
899 }
900
901 #if 0
902 {
903 vm_page_t xx, xxo, xxl;
904 int i, j, k, l;
905
906 j = 0; /* (BRINGUP) */
907 xxl = 0;
908
909 for( i = 0; i < vm_colors; i++ ) {
910 queue_iterate(&vm_page_queue_free[i],
911 xx,
912 vm_page_t,
913 pageq) { /* BRINGUP */
914 j++; /* (BRINGUP) */
915 if(j > vm_page_free_count) { /* (BRINGUP) */
916 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
917 }
918
919 l = vm_page_free_count - j; /* (BRINGUP) */
920 k = 0; /* (BRINGUP) */
921
922 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
923
924 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
925 k++;
926 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
927 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
928 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
929 }
930 }
931
932 xxl = xx;
933 }
934 }
935
936 if(j != vm_page_free_count) { /* (BRINGUP) */
937 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
938 }
939 }
940 #endif
941
942
943 /*
944 * We have to re-align virtual_space_start,
945 * because pmap_steal_memory has been using it.
946 */
947
948 virtual_space_start = round_page(virtual_space_start);
949
950 *startp = virtual_space_start;
951 *endp = virtual_space_end;
952 }
953 #endif /* MACHINE_PAGES */
954
955 /*
956 * Routine: vm_page_module_init
957 * Purpose:
958 * Second initialization pass, to be done after
959 * the basic VM system is ready.
960 */
961 void
962 vm_page_module_init(void)
963 {
964 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
965 0, PAGE_SIZE, "vm pages");
966
967 #if ZONE_DEBUG
968 zone_debug_disable(vm_page_zone);
969 #endif /* ZONE_DEBUG */
970
971 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
972 zone_change(vm_page_zone, Z_EXPAND, FALSE);
973 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
974 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
975 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
976 /*
977 * Adjust zone statistics to account for the real pages allocated
978 * in vm_page_create(). [Q: is this really what we want?]
979 */
980 vm_page_zone->count += vm_page_pages;
981 vm_page_zone->sum_count += vm_page_pages;
982 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
983 }
984
985 /*
986 * Routine: vm_page_create
987 * Purpose:
988 * After the VM system is up, machine-dependent code
989 * may stumble across more physical memory. For example,
990 * memory that it was reserving for a frame buffer.
991 * vm_page_create turns this memory into available pages.
992 */
993
994 void
995 vm_page_create(
996 ppnum_t start,
997 ppnum_t end)
998 {
999 ppnum_t phys_page;
1000 vm_page_t m;
1001
1002 for (phys_page = start;
1003 phys_page < end;
1004 phys_page++) {
1005 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1006 == VM_PAGE_NULL)
1007 vm_page_more_fictitious();
1008
1009 m->fictitious = FALSE;
1010 pmap_clear_noencrypt(phys_page);
1011
1012 vm_page_pages++;
1013 vm_page_release(m);
1014 }
1015 }
1016
1017 /*
1018 * vm_page_hash:
1019 *
1020 * Distributes the object/offset key pair among hash buckets.
1021 *
1022 * NOTE: The bucket count must be a power of 2
1023 */
1024 #define vm_page_hash(object, offset) (\
1025 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1026 & vm_page_hash_mask)
1027
1028
1029 /*
1030 * vm_page_insert: [ internal use only ]
1031 *
1032 * Inserts the given mem entry into the object/object-page
1033 * table and object list.
1034 *
1035 * The object must be locked.
1036 */
1037 void
1038 vm_page_insert(
1039 vm_page_t mem,
1040 vm_object_t object,
1041 vm_object_offset_t offset)
1042 {
1043 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1044 }
1045
1046 void
1047 vm_page_insert_internal(
1048 vm_page_t mem,
1049 vm_object_t object,
1050 vm_object_offset_t offset,
1051 boolean_t queues_lock_held,
1052 boolean_t insert_in_hash,
1053 boolean_t batch_pmap_op)
1054 {
1055 vm_page_bucket_t *bucket;
1056 lck_spin_t *bucket_lock;
1057 int hash_id;
1058
1059 XPR(XPR_VM_PAGE,
1060 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1061 object, offset, mem, 0,0);
1062 #if 0
1063 /*
1064 * we may not hold the page queue lock
1065 * so this check isn't safe to make
1066 */
1067 VM_PAGE_CHECK(mem);
1068 #endif
1069
1070 assert(page_aligned(offset));
1071
1072 if (object == vm_submap_object) {
1073 /* the vm_submap_object is only a placeholder for submaps */
1074 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1075 }
1076
1077 vm_object_lock_assert_exclusive(object);
1078 #if DEBUG
1079 lck_mtx_assert(&vm_page_queue_lock,
1080 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1081 : LCK_MTX_ASSERT_NOTOWNED);
1082 #endif /* DEBUG */
1083
1084 if (insert_in_hash == TRUE) {
1085 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1086 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1087 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1088 "already in (obj=%p,off=0x%llx)",
1089 mem, object, offset, mem->object, mem->offset);
1090 #endif
1091 assert(!object->internal || offset < object->vo_size);
1092
1093 /* only insert "pageout" pages into "pageout" objects,
1094 * and normal pages into normal objects */
1095 assert(object->pageout == mem->pageout);
1096
1097 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1098
1099 /*
1100 * Record the object/offset pair in this page
1101 */
1102
1103 mem->object = object;
1104 mem->offset = offset;
1105
1106 /*
1107 * Insert it into the object_object/offset hash table
1108 */
1109 hash_id = vm_page_hash(object, offset);
1110 bucket = &vm_page_buckets[hash_id];
1111 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1112
1113 lck_spin_lock(bucket_lock);
1114
1115 mem->next = bucket->pages;
1116 bucket->pages = mem;
1117 #if MACH_PAGE_HASH_STATS
1118 if (++bucket->cur_count > bucket->hi_count)
1119 bucket->hi_count = bucket->cur_count;
1120 #endif /* MACH_PAGE_HASH_STATS */
1121 mem->hashed = TRUE;
1122 lck_spin_unlock(bucket_lock);
1123 }
1124
1125 {
1126 unsigned int cache_attr;
1127
1128 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1129
1130 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1131 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1132 }
1133 }
1134 /*
1135 * Now link into the object's list of backed pages.
1136 */
1137 VM_PAGE_INSERT(mem, object);
1138 mem->tabled = TRUE;
1139
1140 /*
1141 * Show that the object has one more resident page.
1142 */
1143
1144 object->resident_page_count++;
1145 if (VM_PAGE_WIRED(mem)) {
1146 object->wired_page_count++;
1147 }
1148 assert(object->resident_page_count >= object->wired_page_count);
1149
1150 if (object->internal) {
1151 OSAddAtomic(1, &vm_page_internal_count);
1152 } else {
1153 OSAddAtomic(1, &vm_page_external_count);
1154 }
1155
1156 /*
1157 * It wouldn't make sense to insert a "reusable" page in
1158 * an object (the page would have been marked "reusable" only
1159 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1160 * in the object at that time).
1161 * But a page could be inserted in a "all_reusable" object, if
1162 * something faults it in (a vm_read() from another task or a
1163 * "use-after-free" issue in user space, for example). It can
1164 * also happen if we're relocating a page from that object to
1165 * a different physical page during a physically-contiguous
1166 * allocation.
1167 */
1168 assert(!mem->reusable);
1169 if (mem->object->all_reusable) {
1170 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1171 }
1172
1173 if (object->purgable == VM_PURGABLE_VOLATILE) {
1174 if (VM_PAGE_WIRED(mem)) {
1175 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1176 } else {
1177 OSAddAtomic(1, &vm_page_purgeable_count);
1178 }
1179 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1180 mem->throttled) {
1181 /*
1182 * This page belongs to a purged VM object but hasn't
1183 * been purged (because it was "busy").
1184 * It's in the "throttled" queue and hence not
1185 * visible to vm_pageout_scan(). Move it to a pageable
1186 * queue, so that it can eventually be reclaimed, instead
1187 * of lingering in the "empty" object.
1188 */
1189 if (queues_lock_held == FALSE)
1190 vm_page_lockspin_queues();
1191 vm_page_deactivate(mem);
1192 if (queues_lock_held == FALSE)
1193 vm_page_unlock_queues();
1194 }
1195 }
1196
1197 /*
1198 * vm_page_replace:
1199 *
1200 * Exactly like vm_page_insert, except that we first
1201 * remove any existing page at the given offset in object.
1202 *
1203 * The object must be locked.
1204 */
1205 void
1206 vm_page_replace(
1207 register vm_page_t mem,
1208 register vm_object_t object,
1209 register vm_object_offset_t offset)
1210 {
1211 vm_page_bucket_t *bucket;
1212 vm_page_t found_m = VM_PAGE_NULL;
1213 lck_spin_t *bucket_lock;
1214 int hash_id;
1215
1216 #if 0
1217 /*
1218 * we don't hold the page queue lock
1219 * so this check isn't safe to make
1220 */
1221 VM_PAGE_CHECK(mem);
1222 #endif
1223 vm_object_lock_assert_exclusive(object);
1224 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1225 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1226 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1227 "already in (obj=%p,off=0x%llx)",
1228 mem, object, offset, mem->object, mem->offset);
1229 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1230 #endif
1231 /*
1232 * Record the object/offset pair in this page
1233 */
1234
1235 mem->object = object;
1236 mem->offset = offset;
1237
1238 /*
1239 * Insert it into the object_object/offset hash table,
1240 * replacing any page that might have been there.
1241 */
1242
1243 hash_id = vm_page_hash(object, offset);
1244 bucket = &vm_page_buckets[hash_id];
1245 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1246
1247 lck_spin_lock(bucket_lock);
1248
1249 if (bucket->pages) {
1250 vm_page_t *mp = &bucket->pages;
1251 vm_page_t m = *mp;
1252
1253 do {
1254 if (m->object == object && m->offset == offset) {
1255 /*
1256 * Remove old page from hash list
1257 */
1258 *mp = m->next;
1259 m->hashed = FALSE;
1260
1261 found_m = m;
1262 break;
1263 }
1264 mp = &m->next;
1265 } while ((m = *mp));
1266
1267 mem->next = bucket->pages;
1268 } else {
1269 mem->next = VM_PAGE_NULL;
1270 }
1271 /*
1272 * insert new page at head of hash list
1273 */
1274 bucket->pages = mem;
1275 mem->hashed = TRUE;
1276
1277 lck_spin_unlock(bucket_lock);
1278
1279 if (found_m) {
1280 /*
1281 * there was already a page at the specified
1282 * offset for this object... remove it from
1283 * the object and free it back to the free list
1284 */
1285 vm_page_free_unlocked(found_m, FALSE);
1286 }
1287 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1288 }
1289
1290 /*
1291 * vm_page_remove: [ internal use only ]
1292 *
1293 * Removes the given mem entry from the object/offset-page
1294 * table and the object page list.
1295 *
1296 * The object must be locked.
1297 */
1298
1299 void
1300 vm_page_remove(
1301 vm_page_t mem,
1302 boolean_t remove_from_hash)
1303 {
1304 vm_page_bucket_t *bucket;
1305 vm_page_t this;
1306 lck_spin_t *bucket_lock;
1307 int hash_id;
1308
1309 XPR(XPR_VM_PAGE,
1310 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1311 mem->object, mem->offset,
1312 mem, 0,0);
1313
1314 vm_object_lock_assert_exclusive(mem->object);
1315 assert(mem->tabled);
1316 assert(!mem->cleaning);
1317 assert(!mem->laundry);
1318 #if 0
1319 /*
1320 * we don't hold the page queue lock
1321 * so this check isn't safe to make
1322 */
1323 VM_PAGE_CHECK(mem);
1324 #endif
1325 if (remove_from_hash == TRUE) {
1326 /*
1327 * Remove from the object_object/offset hash table
1328 */
1329 hash_id = vm_page_hash(mem->object, mem->offset);
1330 bucket = &vm_page_buckets[hash_id];
1331 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1332
1333 lck_spin_lock(bucket_lock);
1334
1335 if ((this = bucket->pages) == mem) {
1336 /* optimize for common case */
1337
1338 bucket->pages = mem->next;
1339 } else {
1340 vm_page_t *prev;
1341
1342 for (prev = &this->next;
1343 (this = *prev) != mem;
1344 prev = &this->next)
1345 continue;
1346 *prev = this->next;
1347 }
1348 #if MACH_PAGE_HASH_STATS
1349 bucket->cur_count--;
1350 #endif /* MACH_PAGE_HASH_STATS */
1351 mem->hashed = FALSE;
1352 lck_spin_unlock(bucket_lock);
1353 }
1354 /*
1355 * Now remove from the object's list of backed pages.
1356 */
1357
1358 VM_PAGE_REMOVE(mem);
1359
1360 /*
1361 * And show that the object has one fewer resident
1362 * page.
1363 */
1364
1365 assert(mem->object->resident_page_count > 0);
1366 mem->object->resident_page_count--;
1367
1368 if (mem->object->internal) {
1369 assert(vm_page_internal_count);
1370 OSAddAtomic(-1, &vm_page_internal_count);
1371 } else {
1372 assert(vm_page_external_count);
1373 OSAddAtomic(-1, &vm_page_external_count);
1374 }
1375 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1376 if (mem->object->resident_page_count == 0)
1377 vm_object_cache_remove(mem->object);
1378 }
1379
1380 if (VM_PAGE_WIRED(mem)) {
1381 assert(mem->object->wired_page_count > 0);
1382 mem->object->wired_page_count--;
1383 }
1384 assert(mem->object->resident_page_count >=
1385 mem->object->wired_page_count);
1386 if (mem->reusable) {
1387 assert(mem->object->reusable_page_count > 0);
1388 mem->object->reusable_page_count--;
1389 assert(mem->object->reusable_page_count <=
1390 mem->object->resident_page_count);
1391 mem->reusable = FALSE;
1392 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1393 vm_page_stats_reusable.reused_remove++;
1394 } else if (mem->object->all_reusable) {
1395 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1396 vm_page_stats_reusable.reused_remove++;
1397 }
1398
1399 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1400 if (VM_PAGE_WIRED(mem)) {
1401 assert(vm_page_purgeable_wired_count > 0);
1402 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1403 } else {
1404 assert(vm_page_purgeable_count > 0);
1405 OSAddAtomic(-1, &vm_page_purgeable_count);
1406 }
1407 }
1408 if (mem->object->set_cache_attr == TRUE)
1409 pmap_set_cache_attributes(mem->phys_page, 0);
1410
1411 mem->tabled = FALSE;
1412 mem->object = VM_OBJECT_NULL;
1413 mem->offset = (vm_object_offset_t) -1;
1414 }
1415
1416
1417 /*
1418 * vm_page_lookup:
1419 *
1420 * Returns the page associated with the object/offset
1421 * pair specified; if none is found, VM_PAGE_NULL is returned.
1422 *
1423 * The object must be locked. No side effects.
1424 */
1425
1426 unsigned long vm_page_lookup_hint = 0;
1427 unsigned long vm_page_lookup_hint_next = 0;
1428 unsigned long vm_page_lookup_hint_prev = 0;
1429 unsigned long vm_page_lookup_hint_miss = 0;
1430 unsigned long vm_page_lookup_bucket_NULL = 0;
1431 unsigned long vm_page_lookup_miss = 0;
1432
1433
1434 vm_page_t
1435 vm_page_lookup(
1436 vm_object_t object,
1437 vm_object_offset_t offset)
1438 {
1439 vm_page_t mem;
1440 vm_page_bucket_t *bucket;
1441 queue_entry_t qe;
1442 lck_spin_t *bucket_lock;
1443 int hash_id;
1444
1445 vm_object_lock_assert_held(object);
1446 mem = object->memq_hint;
1447
1448 if (mem != VM_PAGE_NULL) {
1449 assert(mem->object == object);
1450
1451 if (mem->offset == offset) {
1452 vm_page_lookup_hint++;
1453 return mem;
1454 }
1455 qe = queue_next(&mem->listq);
1456
1457 if (! queue_end(&object->memq, qe)) {
1458 vm_page_t next_page;
1459
1460 next_page = (vm_page_t) qe;
1461 assert(next_page->object == object);
1462
1463 if (next_page->offset == offset) {
1464 vm_page_lookup_hint_next++;
1465 object->memq_hint = next_page; /* new hint */
1466 return next_page;
1467 }
1468 }
1469 qe = queue_prev(&mem->listq);
1470
1471 if (! queue_end(&object->memq, qe)) {
1472 vm_page_t prev_page;
1473
1474 prev_page = (vm_page_t) qe;
1475 assert(prev_page->object == object);
1476
1477 if (prev_page->offset == offset) {
1478 vm_page_lookup_hint_prev++;
1479 object->memq_hint = prev_page; /* new hint */
1480 return prev_page;
1481 }
1482 }
1483 }
1484 /*
1485 * Search the hash table for this object/offset pair
1486 */
1487 hash_id = vm_page_hash(object, offset);
1488 bucket = &vm_page_buckets[hash_id];
1489
1490 /*
1491 * since we hold the object lock, we are guaranteed that no
1492 * new pages can be inserted into this object... this in turn
1493 * guarantess that the page we're looking for can't exist
1494 * if the bucket it hashes to is currently NULL even when looked
1495 * at outside the scope of the hash bucket lock... this is a
1496 * really cheap optimiztion to avoid taking the lock
1497 */
1498 if (bucket->pages == VM_PAGE_NULL) {
1499 vm_page_lookup_bucket_NULL++;
1500
1501 return (VM_PAGE_NULL);
1502 }
1503 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1504
1505 lck_spin_lock(bucket_lock);
1506
1507 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1508 #if 0
1509 /*
1510 * we don't hold the page queue lock
1511 * so this check isn't safe to make
1512 */
1513 VM_PAGE_CHECK(mem);
1514 #endif
1515 if ((mem->object == object) && (mem->offset == offset))
1516 break;
1517 }
1518 lck_spin_unlock(bucket_lock);
1519
1520 if (mem != VM_PAGE_NULL) {
1521 if (object->memq_hint != VM_PAGE_NULL) {
1522 vm_page_lookup_hint_miss++;
1523 }
1524 assert(mem->object == object);
1525 object->memq_hint = mem;
1526 } else
1527 vm_page_lookup_miss++;
1528
1529 return(mem);
1530 }
1531
1532
1533 /*
1534 * vm_page_rename:
1535 *
1536 * Move the given memory entry from its
1537 * current object to the specified target object/offset.
1538 *
1539 * The object must be locked.
1540 */
1541 void
1542 vm_page_rename(
1543 register vm_page_t mem,
1544 register vm_object_t new_object,
1545 vm_object_offset_t new_offset,
1546 boolean_t encrypted_ok)
1547 {
1548 boolean_t internal_to_external, external_to_internal;
1549
1550 assert(mem->object != new_object);
1551
1552 /*
1553 * ENCRYPTED SWAP:
1554 * The encryption key is based on the page's memory object
1555 * (aka "pager") and paging offset. Moving the page to
1556 * another VM object changes its "pager" and "paging_offset"
1557 * so it has to be decrypted first, or we would lose the key.
1558 *
1559 * One exception is VM object collapsing, where we transfer pages
1560 * from one backing object to its parent object. This operation also
1561 * transfers the paging information, so the <pager,paging_offset> info
1562 * should remain consistent. The caller (vm_object_do_collapse())
1563 * sets "encrypted_ok" in this case.
1564 */
1565 if (!encrypted_ok && mem->encrypted) {
1566 panic("vm_page_rename: page %p is encrypted\n", mem);
1567 }
1568
1569 XPR(XPR_VM_PAGE,
1570 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1571 new_object, new_offset,
1572 mem, 0,0);
1573
1574 /*
1575 * Changes to mem->object require the page lock because
1576 * the pageout daemon uses that lock to get the object.
1577 */
1578 vm_page_lockspin_queues();
1579
1580 internal_to_external = FALSE;
1581 external_to_internal = FALSE;
1582
1583 if (mem->local) {
1584 /*
1585 * it's much easier to get the vm_page_pageable_xxx accounting correct
1586 * if we first move the page to the active queue... it's going to end
1587 * up there anyway, and we don't do vm_page_rename's frequently enough
1588 * for this to matter.
1589 */
1590 VM_PAGE_QUEUES_REMOVE(mem);
1591 vm_page_activate(mem);
1592 }
1593 if (mem->active || mem->inactive || mem->speculative) {
1594 if (mem->object->internal && !new_object->internal) {
1595 internal_to_external = TRUE;
1596 }
1597 if (!mem->object->internal && new_object->internal) {
1598 external_to_internal = TRUE;
1599 }
1600 }
1601
1602 vm_page_remove(mem, TRUE);
1603 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1604
1605 if (internal_to_external) {
1606 vm_page_pageable_internal_count--;
1607 vm_page_pageable_external_count++;
1608 } else if (external_to_internal) {
1609 vm_page_pageable_external_count--;
1610 vm_page_pageable_internal_count++;
1611 }
1612
1613 vm_page_unlock_queues();
1614 }
1615
1616 /*
1617 * vm_page_init:
1618 *
1619 * Initialize the fields in a new page.
1620 * This takes a structure with random values and initializes it
1621 * so that it can be given to vm_page_release or vm_page_insert.
1622 */
1623 void
1624 vm_page_init(
1625 vm_page_t mem,
1626 ppnum_t phys_page,
1627 boolean_t lopage)
1628 {
1629 assert(phys_page);
1630
1631 #if DEBUG
1632 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1633 if (!(pmap_valid_page(phys_page))) {
1634 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1635 }
1636 }
1637 #endif
1638 *mem = vm_page_template;
1639 mem->phys_page = phys_page;
1640 #if 0
1641 /*
1642 * we're leaving this turned off for now... currently pages
1643 * come off the free list and are either immediately dirtied/referenced
1644 * due to zero-fill or COW faults, or are used to read or write files...
1645 * in the file I/O case, the UPL mechanism takes care of clearing
1646 * the state of the HW ref/mod bits in a somewhat fragile way.
1647 * Since we may change the way this works in the future (to toughen it up),
1648 * I'm leaving this as a reminder of where these bits could get cleared
1649 */
1650
1651 /*
1652 * make sure both the h/w referenced and modified bits are
1653 * clear at this point... we are especially dependent on
1654 * not finding a 'stale' h/w modified in a number of spots
1655 * once this page goes back into use
1656 */
1657 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1658 #endif
1659 mem->lopage = lopage;
1660 }
1661
1662 /*
1663 * vm_page_grab_fictitious:
1664 *
1665 * Remove a fictitious page from the free list.
1666 * Returns VM_PAGE_NULL if there are no free pages.
1667 */
1668 int c_vm_page_grab_fictitious = 0;
1669 int c_vm_page_grab_fictitious_failed = 0;
1670 int c_vm_page_release_fictitious = 0;
1671 int c_vm_page_more_fictitious = 0;
1672
1673 vm_page_t
1674 vm_page_grab_fictitious_common(
1675 ppnum_t phys_addr)
1676 {
1677 vm_page_t m;
1678
1679 if ((m = (vm_page_t)zget(vm_page_zone))) {
1680
1681 vm_page_init(m, phys_addr, FALSE);
1682 m->fictitious = TRUE;
1683
1684 c_vm_page_grab_fictitious++;
1685 } else
1686 c_vm_page_grab_fictitious_failed++;
1687
1688 return m;
1689 }
1690
1691 vm_page_t
1692 vm_page_grab_fictitious(void)
1693 {
1694 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1695 }
1696
1697 vm_page_t
1698 vm_page_grab_guard(void)
1699 {
1700 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1701 }
1702
1703
1704 /*
1705 * vm_page_release_fictitious:
1706 *
1707 * Release a fictitious page to the zone pool
1708 */
1709 void
1710 vm_page_release_fictitious(
1711 vm_page_t m)
1712 {
1713 assert(!m->free);
1714 assert(m->fictitious);
1715 assert(m->phys_page == vm_page_fictitious_addr ||
1716 m->phys_page == vm_page_guard_addr);
1717
1718 c_vm_page_release_fictitious++;
1719
1720 zfree(vm_page_zone, m);
1721 }
1722
1723 /*
1724 * vm_page_more_fictitious:
1725 *
1726 * Add more fictitious pages to the zone.
1727 * Allowed to block. This routine is way intimate
1728 * with the zones code, for several reasons:
1729 * 1. we need to carve some page structures out of physical
1730 * memory before zones work, so they _cannot_ come from
1731 * the zone_map.
1732 * 2. the zone needs to be collectable in order to prevent
1733 * growth without bound. These structures are used by
1734 * the device pager (by the hundreds and thousands), as
1735 * private pages for pageout, and as blocking pages for
1736 * pagein. Temporary bursts in demand should not result in
1737 * permanent allocation of a resource.
1738 * 3. To smooth allocation humps, we allocate single pages
1739 * with kernel_memory_allocate(), and cram them into the
1740 * zone.
1741 */
1742
1743 void vm_page_more_fictitious(void)
1744 {
1745 vm_offset_t addr;
1746 kern_return_t retval;
1747
1748 c_vm_page_more_fictitious++;
1749
1750 /*
1751 * Allocate a single page from the zone_map. Do not wait if no physical
1752 * pages are immediately available, and do not zero the space. We need
1753 * our own blocking lock here to prevent having multiple,
1754 * simultaneous requests from piling up on the zone_map lock. Exactly
1755 * one (of our) threads should be potentially waiting on the map lock.
1756 * If winner is not vm-privileged, then the page allocation will fail,
1757 * and it will temporarily block here in the vm_page_wait().
1758 */
1759 lck_mtx_lock(&vm_page_alloc_lock);
1760 /*
1761 * If another thread allocated space, just bail out now.
1762 */
1763 if (zone_free_count(vm_page_zone) > 5) {
1764 /*
1765 * The number "5" is a small number that is larger than the
1766 * number of fictitious pages that any single caller will
1767 * attempt to allocate. Otherwise, a thread will attempt to
1768 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1769 * release all of the resources and locks already acquired,
1770 * and then call this routine. This routine finds the pages
1771 * that the caller released, so fails to allocate new space.
1772 * The process repeats infinitely. The largest known number
1773 * of fictitious pages required in this manner is 2. 5 is
1774 * simply a somewhat larger number.
1775 */
1776 lck_mtx_unlock(&vm_page_alloc_lock);
1777 return;
1778 }
1779
1780 retval = kernel_memory_allocate(zone_map,
1781 &addr, PAGE_SIZE, VM_PROT_ALL,
1782 KMA_KOBJECT|KMA_NOPAGEWAIT);
1783 if (retval != KERN_SUCCESS) {
1784 /*
1785 * No page was available. Drop the
1786 * lock to give another thread a chance at it, and
1787 * wait for the pageout daemon to make progress.
1788 */
1789 lck_mtx_unlock(&vm_page_alloc_lock);
1790 vm_page_wait(THREAD_UNINT);
1791 return;
1792 }
1793
1794 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1795 OSAddAtomic64(1, &(vm_page_zone->page_count));
1796
1797 zcram(vm_page_zone, addr, PAGE_SIZE);
1798
1799 lck_mtx_unlock(&vm_page_alloc_lock);
1800 }
1801
1802
1803 /*
1804 * vm_pool_low():
1805 *
1806 * Return true if it is not likely that a non-vm_privileged thread
1807 * can get memory without blocking. Advisory only, since the
1808 * situation may change under us.
1809 */
1810 int
1811 vm_pool_low(void)
1812 {
1813 /* No locking, at worst we will fib. */
1814 return( vm_page_free_count <= vm_page_free_reserved );
1815 }
1816
1817
1818
1819 /*
1820 * this is an interface to support bring-up of drivers
1821 * on platforms with physical memory > 4G...
1822 */
1823 int vm_himemory_mode = 0;
1824
1825
1826 /*
1827 * this interface exists to support hardware controllers
1828 * incapable of generating DMAs with more than 32 bits
1829 * of address on platforms with physical memory > 4G...
1830 */
1831 unsigned int vm_lopages_allocated_q = 0;
1832 unsigned int vm_lopages_allocated_cpm_success = 0;
1833 unsigned int vm_lopages_allocated_cpm_failed = 0;
1834 queue_head_t vm_lopage_queue_free;
1835
1836 vm_page_t
1837 vm_page_grablo(void)
1838 {
1839 vm_page_t mem;
1840
1841 if (vm_lopage_needed == FALSE)
1842 return (vm_page_grab());
1843
1844 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1845
1846 if ( !queue_empty(&vm_lopage_queue_free)) {
1847 queue_remove_first(&vm_lopage_queue_free,
1848 mem,
1849 vm_page_t,
1850 pageq);
1851 assert(vm_lopage_free_count);
1852
1853 vm_lopage_free_count--;
1854 vm_lopages_allocated_q++;
1855
1856 if (vm_lopage_free_count < vm_lopage_lowater)
1857 vm_lopage_refill = TRUE;
1858
1859 lck_mtx_unlock(&vm_page_queue_free_lock);
1860 } else {
1861 lck_mtx_unlock(&vm_page_queue_free_lock);
1862
1863 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1864
1865 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1866 vm_lopages_allocated_cpm_failed++;
1867 lck_mtx_unlock(&vm_page_queue_free_lock);
1868
1869 return (VM_PAGE_NULL);
1870 }
1871 mem->busy = TRUE;
1872
1873 vm_page_lockspin_queues();
1874
1875 mem->gobbled = FALSE;
1876 vm_page_gobble_count--;
1877 vm_page_wire_count--;
1878
1879 vm_lopages_allocated_cpm_success++;
1880 vm_page_unlock_queues();
1881 }
1882 assert(mem->busy);
1883 assert(!mem->free);
1884 assert(!mem->pmapped);
1885 assert(!mem->wpmapped);
1886 assert(!pmap_is_noencrypt(mem->phys_page));
1887
1888 mem->pageq.next = NULL;
1889 mem->pageq.prev = NULL;
1890
1891 return (mem);
1892 }
1893
1894
1895 /*
1896 * vm_page_grab:
1897 *
1898 * first try to grab a page from the per-cpu free list...
1899 * this must be done while pre-emption is disabled... if
1900 * a page is available, we're done...
1901 * if no page is available, grab the vm_page_queue_free_lock
1902 * and see if current number of free pages would allow us
1903 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1904 * if there are pages available, disable preemption and
1905 * recheck the state of the per-cpu free list... we could
1906 * have been preempted and moved to a different cpu, or
1907 * some other thread could have re-filled it... if still
1908 * empty, figure out how many pages we can steal from the
1909 * global free queue and move to the per-cpu queue...
1910 * return 1 of these pages when done... only wakeup the
1911 * pageout_scan thread if we moved pages from the global
1912 * list... no need for the wakeup if we've satisfied the
1913 * request from the per-cpu queue.
1914 */
1915
1916 #define COLOR_GROUPS_TO_STEAL 4
1917
1918
1919 vm_page_t
1920 vm_page_grab( void )
1921 {
1922 vm_page_t mem;
1923
1924
1925 disable_preemption();
1926
1927 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1928 return_page_from_cpu_list:
1929 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1930 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1931 mem->pageq.next = NULL;
1932
1933 enable_preemption();
1934
1935 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1936 assert(mem->tabled == FALSE);
1937 assert(mem->object == VM_OBJECT_NULL);
1938 assert(!mem->laundry);
1939 assert(!mem->free);
1940 assert(pmap_verify_free(mem->phys_page));
1941 assert(mem->busy);
1942 assert(!mem->encrypted);
1943 assert(!mem->pmapped);
1944 assert(!mem->wpmapped);
1945 assert(!mem->active);
1946 assert(!mem->inactive);
1947 assert(!mem->throttled);
1948 assert(!mem->speculative);
1949 assert(!pmap_is_noencrypt(mem->phys_page));
1950
1951 return mem;
1952 }
1953 enable_preemption();
1954
1955
1956 /*
1957 * Optionally produce warnings if the wire or gobble
1958 * counts exceed some threshold.
1959 */
1960 if (vm_page_wire_count_warning > 0
1961 && vm_page_wire_count >= vm_page_wire_count_warning) {
1962 printf("mk: vm_page_grab(): high wired page count of %d\n",
1963 vm_page_wire_count);
1964 assert(vm_page_wire_count < vm_page_wire_count_warning);
1965 }
1966 if (vm_page_gobble_count_warning > 0
1967 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1968 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1969 vm_page_gobble_count);
1970 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1971 }
1972
1973 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1974
1975 /*
1976 * Only let privileged threads (involved in pageout)
1977 * dip into the reserved pool.
1978 */
1979 if ((vm_page_free_count < vm_page_free_reserved) &&
1980 !(current_thread()->options & TH_OPT_VMPRIV)) {
1981 lck_mtx_unlock(&vm_page_queue_free_lock);
1982 mem = VM_PAGE_NULL;
1983 }
1984 else {
1985 vm_page_t head;
1986 vm_page_t tail;
1987 unsigned int pages_to_steal;
1988 unsigned int color;
1989
1990 while ( vm_page_free_count == 0 ) {
1991
1992 lck_mtx_unlock(&vm_page_queue_free_lock);
1993 /*
1994 * must be a privileged thread to be
1995 * in this state since a non-privileged
1996 * thread would have bailed if we were
1997 * under the vm_page_free_reserved mark
1998 */
1999 VM_PAGE_WAIT();
2000 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2001 }
2002
2003 disable_preemption();
2004
2005 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2006 lck_mtx_unlock(&vm_page_queue_free_lock);
2007
2008 /*
2009 * we got preempted and moved to another processor
2010 * or we got preempted and someone else ran and filled the cache
2011 */
2012 goto return_page_from_cpu_list;
2013 }
2014 if (vm_page_free_count <= vm_page_free_reserved)
2015 pages_to_steal = 1;
2016 else {
2017 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
2018
2019 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
2020 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2021 }
2022 color = PROCESSOR_DATA(current_processor(), start_color);
2023 head = tail = NULL;
2024
2025 while (pages_to_steal--) {
2026 if (--vm_page_free_count < vm_page_free_count_minimum)
2027 vm_page_free_count_minimum = vm_page_free_count;
2028
2029 while (queue_empty(&vm_page_queue_free[color]))
2030 color = (color + 1) & vm_color_mask;
2031
2032 queue_remove_first(&vm_page_queue_free[color],
2033 mem,
2034 vm_page_t,
2035 pageq);
2036 mem->pageq.next = NULL;
2037 mem->pageq.prev = NULL;
2038
2039 assert(!mem->active);
2040 assert(!mem->inactive);
2041 assert(!mem->throttled);
2042 assert(!mem->speculative);
2043
2044 color = (color + 1) & vm_color_mask;
2045
2046 if (head == NULL)
2047 head = mem;
2048 else
2049 tail->pageq.next = (queue_t)mem;
2050 tail = mem;
2051
2052 mem->pageq.prev = NULL;
2053 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2054 assert(mem->tabled == FALSE);
2055 assert(mem->object == VM_OBJECT_NULL);
2056 assert(!mem->laundry);
2057 assert(mem->free);
2058 mem->free = FALSE;
2059
2060 assert(pmap_verify_free(mem->phys_page));
2061 assert(mem->busy);
2062 assert(!mem->free);
2063 assert(!mem->encrypted);
2064 assert(!mem->pmapped);
2065 assert(!mem->wpmapped);
2066 assert(!pmap_is_noencrypt(mem->phys_page));
2067 }
2068 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2069 PROCESSOR_DATA(current_processor(), start_color) = color;
2070
2071 /*
2072 * satisfy this request
2073 */
2074 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2075 mem = head;
2076 mem->pageq.next = NULL;
2077
2078 lck_mtx_unlock(&vm_page_queue_free_lock);
2079
2080 enable_preemption();
2081 }
2082 /*
2083 * Decide if we should poke the pageout daemon.
2084 * We do this if the free count is less than the low
2085 * water mark, or if the free count is less than the high
2086 * water mark (but above the low water mark) and the inactive
2087 * count is less than its target.
2088 *
2089 * We don't have the counts locked ... if they change a little,
2090 * it doesn't really matter.
2091 */
2092 if ((vm_page_free_count < vm_page_free_min) ||
2093 ((vm_page_free_count < vm_page_free_target) &&
2094 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2095 thread_wakeup((event_t) &vm_page_free_wanted);
2096
2097 VM_CHECK_MEMORYSTATUS;
2098
2099 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2100
2101 return mem;
2102 }
2103
2104 /*
2105 * vm_page_release:
2106 *
2107 * Return a page to the free list.
2108 */
2109
2110 void
2111 vm_page_release(
2112 register vm_page_t mem)
2113 {
2114 unsigned int color;
2115 int need_wakeup = 0;
2116 int need_priv_wakeup = 0;
2117
2118
2119 assert(!mem->private && !mem->fictitious);
2120 if (vm_page_free_verify) {
2121 assert(pmap_verify_free(mem->phys_page));
2122 }
2123 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2124
2125 pmap_clear_noencrypt(mem->phys_page);
2126
2127 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2128 #if DEBUG
2129 if (mem->free)
2130 panic("vm_page_release");
2131 #endif
2132
2133 assert(mem->busy);
2134 assert(!mem->laundry);
2135 assert(mem->object == VM_OBJECT_NULL);
2136 assert(mem->pageq.next == NULL &&
2137 mem->pageq.prev == NULL);
2138 assert(mem->listq.next == NULL &&
2139 mem->listq.prev == NULL);
2140
2141 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2142 vm_lopage_free_count < vm_lopage_free_limit &&
2143 mem->phys_page < max_valid_low_ppnum) {
2144 /*
2145 * this exists to support hardware controllers
2146 * incapable of generating DMAs with more than 32 bits
2147 * of address on platforms with physical memory > 4G...
2148 */
2149 queue_enter_first(&vm_lopage_queue_free,
2150 mem,
2151 vm_page_t,
2152 pageq);
2153 vm_lopage_free_count++;
2154
2155 if (vm_lopage_free_count >= vm_lopage_free_limit)
2156 vm_lopage_refill = FALSE;
2157
2158 mem->lopage = TRUE;
2159 } else {
2160 mem->lopage = FALSE;
2161 mem->free = TRUE;
2162
2163 color = mem->phys_page & vm_color_mask;
2164 queue_enter_first(&vm_page_queue_free[color],
2165 mem,
2166 vm_page_t,
2167 pageq);
2168 vm_page_free_count++;
2169 /*
2170 * Check if we should wake up someone waiting for page.
2171 * But don't bother waking them unless they can allocate.
2172 *
2173 * We wakeup only one thread, to prevent starvation.
2174 * Because the scheduling system handles wait queues FIFO,
2175 * if we wakeup all waiting threads, one greedy thread
2176 * can starve multiple niceguy threads. When the threads
2177 * all wakeup, the greedy threads runs first, grabs the page,
2178 * and waits for another page. It will be the first to run
2179 * when the next page is freed.
2180 *
2181 * However, there is a slight danger here.
2182 * The thread we wake might not use the free page.
2183 * Then the other threads could wait indefinitely
2184 * while the page goes unused. To forestall this,
2185 * the pageout daemon will keep making free pages
2186 * as long as vm_page_free_wanted is non-zero.
2187 */
2188
2189 assert(vm_page_free_count > 0);
2190 if (vm_page_free_wanted_privileged > 0) {
2191 vm_page_free_wanted_privileged--;
2192 need_priv_wakeup = 1;
2193 } else if (vm_page_free_wanted > 0 &&
2194 vm_page_free_count > vm_page_free_reserved) {
2195 vm_page_free_wanted--;
2196 need_wakeup = 1;
2197 }
2198 }
2199 lck_mtx_unlock(&vm_page_queue_free_lock);
2200
2201 if (need_priv_wakeup)
2202 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2203 else if (need_wakeup)
2204 thread_wakeup_one((event_t) &vm_page_free_count);
2205
2206 VM_CHECK_MEMORYSTATUS;
2207 }
2208
2209 /*
2210 * vm_page_wait:
2211 *
2212 * Wait for a page to become available.
2213 * If there are plenty of free pages, then we don't sleep.
2214 *
2215 * Returns:
2216 * TRUE: There may be another page, try again
2217 * FALSE: We were interrupted out of our wait, don't try again
2218 */
2219
2220 boolean_t
2221 vm_page_wait(
2222 int interruptible )
2223 {
2224 /*
2225 * We can't use vm_page_free_reserved to make this
2226 * determination. Consider: some thread might
2227 * need to allocate two pages. The first allocation
2228 * succeeds, the second fails. After the first page is freed,
2229 * a call to vm_page_wait must really block.
2230 */
2231 kern_return_t wait_result;
2232 int need_wakeup = 0;
2233 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2234
2235 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2236
2237 if (is_privileged && vm_page_free_count) {
2238 lck_mtx_unlock(&vm_page_queue_free_lock);
2239 return TRUE;
2240 }
2241 if (vm_page_free_count < vm_page_free_target) {
2242
2243 if (is_privileged) {
2244 if (vm_page_free_wanted_privileged++ == 0)
2245 need_wakeup = 1;
2246 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2247 } else {
2248 if (vm_page_free_wanted++ == 0)
2249 need_wakeup = 1;
2250 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2251 }
2252 lck_mtx_unlock(&vm_page_queue_free_lock);
2253 counter(c_vm_page_wait_block++);
2254
2255 if (need_wakeup)
2256 thread_wakeup((event_t)&vm_page_free_wanted);
2257
2258 if (wait_result == THREAD_WAITING) {
2259 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2260 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2261 wait_result = thread_block(THREAD_CONTINUE_NULL);
2262 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2263 }
2264
2265 return(wait_result == THREAD_AWAKENED);
2266 } else {
2267 lck_mtx_unlock(&vm_page_queue_free_lock);
2268 return TRUE;
2269 }
2270 }
2271
2272 /*
2273 * vm_page_alloc:
2274 *
2275 * Allocate and return a memory cell associated
2276 * with this VM object/offset pair.
2277 *
2278 * Object must be locked.
2279 */
2280
2281 vm_page_t
2282 vm_page_alloc(
2283 vm_object_t object,
2284 vm_object_offset_t offset)
2285 {
2286 register vm_page_t mem;
2287
2288 vm_object_lock_assert_exclusive(object);
2289 mem = vm_page_grab();
2290 if (mem == VM_PAGE_NULL)
2291 return VM_PAGE_NULL;
2292
2293 vm_page_insert(mem, object, offset);
2294
2295 return(mem);
2296 }
2297
2298 vm_page_t
2299 vm_page_alloclo(
2300 vm_object_t object,
2301 vm_object_offset_t offset)
2302 {
2303 register vm_page_t mem;
2304
2305 vm_object_lock_assert_exclusive(object);
2306 mem = vm_page_grablo();
2307 if (mem == VM_PAGE_NULL)
2308 return VM_PAGE_NULL;
2309
2310 vm_page_insert(mem, object, offset);
2311
2312 return(mem);
2313 }
2314
2315
2316 /*
2317 * vm_page_alloc_guard:
2318 *
2319 * Allocate a fictitious page which will be used
2320 * as a guard page. The page will be inserted into
2321 * the object and returned to the caller.
2322 */
2323
2324 vm_page_t
2325 vm_page_alloc_guard(
2326 vm_object_t object,
2327 vm_object_offset_t offset)
2328 {
2329 register vm_page_t mem;
2330
2331 vm_object_lock_assert_exclusive(object);
2332 mem = vm_page_grab_guard();
2333 if (mem == VM_PAGE_NULL)
2334 return VM_PAGE_NULL;
2335
2336 vm_page_insert(mem, object, offset);
2337
2338 return(mem);
2339 }
2340
2341
2342 counter(unsigned int c_laundry_pages_freed = 0;)
2343
2344 /*
2345 * vm_page_free_prepare:
2346 *
2347 * Removes page from any queue it may be on
2348 * and disassociates it from its VM object.
2349 *
2350 * Object and page queues must be locked prior to entry.
2351 */
2352 static void
2353 vm_page_free_prepare(
2354 vm_page_t mem)
2355 {
2356 vm_page_free_prepare_queues(mem);
2357 vm_page_free_prepare_object(mem, TRUE);
2358 }
2359
2360
2361 void
2362 vm_page_free_prepare_queues(
2363 vm_page_t mem)
2364 {
2365 VM_PAGE_CHECK(mem);
2366 assert(!mem->free);
2367 assert(!mem->cleaning);
2368 #if DEBUG
2369 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2370 if (mem->free)
2371 panic("vm_page_free: freeing page on free list\n");
2372 #endif
2373 if (mem->object) {
2374 vm_object_lock_assert_exclusive(mem->object);
2375 }
2376 if (mem->laundry) {
2377 /*
2378 * We may have to free a page while it's being laundered
2379 * if we lost its pager (due to a forced unmount, for example).
2380 * We need to call vm_pageout_steal_laundry() before removing
2381 * the page from its VM object, so that we can remove it
2382 * from its pageout queue and adjust the laundry accounting
2383 */
2384 vm_pageout_steal_laundry(mem, TRUE);
2385 counter(++c_laundry_pages_freed);
2386 }
2387
2388 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2389
2390 if (VM_PAGE_WIRED(mem)) {
2391 if (mem->object) {
2392 assert(mem->object->wired_page_count > 0);
2393 mem->object->wired_page_count--;
2394 assert(mem->object->resident_page_count >=
2395 mem->object->wired_page_count);
2396
2397 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2398 OSAddAtomic(+1, &vm_page_purgeable_count);
2399 assert(vm_page_purgeable_wired_count > 0);
2400 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2401 }
2402 }
2403 if (!mem->private && !mem->fictitious)
2404 vm_page_wire_count--;
2405 mem->wire_count = 0;
2406 assert(!mem->gobbled);
2407 } else if (mem->gobbled) {
2408 if (!mem->private && !mem->fictitious)
2409 vm_page_wire_count--;
2410 vm_page_gobble_count--;
2411 }
2412 }
2413
2414
2415 void
2416 vm_page_free_prepare_object(
2417 vm_page_t mem,
2418 boolean_t remove_from_hash)
2419 {
2420 if (mem->tabled)
2421 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2422
2423 PAGE_WAKEUP(mem); /* clears wanted */
2424
2425 if (mem->private) {
2426 mem->private = FALSE;
2427 mem->fictitious = TRUE;
2428 mem->phys_page = vm_page_fictitious_addr;
2429 }
2430 if ( !mem->fictitious) {
2431 vm_page_init(mem, mem->phys_page, mem->lopage);
2432 }
2433 }
2434
2435
2436 /*
2437 * vm_page_free:
2438 *
2439 * Returns the given page to the free list,
2440 * disassociating it with any VM object.
2441 *
2442 * Object and page queues must be locked prior to entry.
2443 */
2444 void
2445 vm_page_free(
2446 vm_page_t mem)
2447 {
2448 vm_page_free_prepare(mem);
2449
2450 if (mem->fictitious) {
2451 vm_page_release_fictitious(mem);
2452 } else {
2453 vm_page_release(mem);
2454 }
2455 }
2456
2457
2458 void
2459 vm_page_free_unlocked(
2460 vm_page_t mem,
2461 boolean_t remove_from_hash)
2462 {
2463 vm_page_lockspin_queues();
2464 vm_page_free_prepare_queues(mem);
2465 vm_page_unlock_queues();
2466
2467 vm_page_free_prepare_object(mem, remove_from_hash);
2468
2469 if (mem->fictitious) {
2470 vm_page_release_fictitious(mem);
2471 } else {
2472 vm_page_release(mem);
2473 }
2474 }
2475
2476
2477 /*
2478 * Free a list of pages. The list can be up to several hundred pages,
2479 * as blocked up by vm_pageout_scan().
2480 * The big win is not having to take the free list lock once
2481 * per page.
2482 */
2483 void
2484 vm_page_free_list(
2485 vm_page_t freeq,
2486 boolean_t prepare_object)
2487 {
2488 vm_page_t mem;
2489 vm_page_t nxt;
2490 vm_page_t local_freeq;
2491 int pg_count;
2492
2493 while (freeq) {
2494
2495 pg_count = 0;
2496 local_freeq = VM_PAGE_NULL;
2497 mem = freeq;
2498
2499 /*
2500 * break up the processing into smaller chunks so
2501 * that we can 'pipeline' the pages onto the
2502 * free list w/o introducing too much
2503 * contention on the global free queue lock
2504 */
2505 while (mem && pg_count < 64) {
2506
2507 assert(!mem->inactive);
2508 assert(!mem->active);
2509 assert(!mem->throttled);
2510 assert(!mem->free);
2511 assert(!mem->speculative);
2512 assert(!VM_PAGE_WIRED(mem));
2513 assert(mem->pageq.prev == NULL);
2514
2515 nxt = (vm_page_t)(mem->pageq.next);
2516
2517 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2518 assert(pmap_verify_free(mem->phys_page));
2519 }
2520 if (prepare_object == TRUE)
2521 vm_page_free_prepare_object(mem, TRUE);
2522
2523 if (!mem->fictitious) {
2524 assert(mem->busy);
2525
2526 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2527 vm_lopage_free_count < vm_lopage_free_limit &&
2528 mem->phys_page < max_valid_low_ppnum) {
2529 mem->pageq.next = NULL;
2530 vm_page_release(mem);
2531 } else {
2532 /*
2533 * IMPORTANT: we can't set the page "free" here
2534 * because that would make the page eligible for
2535 * a physically-contiguous allocation (see
2536 * vm_page_find_contiguous()) right away (we don't
2537 * hold the vm_page_queue_free lock). That would
2538 * cause trouble because the page is not actually
2539 * in the free queue yet...
2540 */
2541 mem->pageq.next = (queue_entry_t)local_freeq;
2542 local_freeq = mem;
2543 pg_count++;
2544
2545 pmap_clear_noencrypt(mem->phys_page);
2546 }
2547 } else {
2548 assert(mem->phys_page == vm_page_fictitious_addr ||
2549 mem->phys_page == vm_page_guard_addr);
2550 vm_page_release_fictitious(mem);
2551 }
2552 mem = nxt;
2553 }
2554 freeq = mem;
2555
2556 if ( (mem = local_freeq) ) {
2557 unsigned int avail_free_count;
2558 unsigned int need_wakeup = 0;
2559 unsigned int need_priv_wakeup = 0;
2560
2561 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2562
2563 while (mem) {
2564 int color;
2565
2566 nxt = (vm_page_t)(mem->pageq.next);
2567
2568 assert(!mem->free);
2569 assert(mem->busy);
2570 mem->free = TRUE;
2571
2572 color = mem->phys_page & vm_color_mask;
2573 queue_enter_first(&vm_page_queue_free[color],
2574 mem,
2575 vm_page_t,
2576 pageq);
2577 mem = nxt;
2578 }
2579 vm_page_free_count += pg_count;
2580 avail_free_count = vm_page_free_count;
2581
2582 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2583
2584 if (avail_free_count < vm_page_free_wanted_privileged) {
2585 need_priv_wakeup = avail_free_count;
2586 vm_page_free_wanted_privileged -= avail_free_count;
2587 avail_free_count = 0;
2588 } else {
2589 need_priv_wakeup = vm_page_free_wanted_privileged;
2590 vm_page_free_wanted_privileged = 0;
2591 avail_free_count -= vm_page_free_wanted_privileged;
2592 }
2593 }
2594 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2595 unsigned int available_pages;
2596
2597 available_pages = avail_free_count - vm_page_free_reserved;
2598
2599 if (available_pages >= vm_page_free_wanted) {
2600 need_wakeup = vm_page_free_wanted;
2601 vm_page_free_wanted = 0;
2602 } else {
2603 need_wakeup = available_pages;
2604 vm_page_free_wanted -= available_pages;
2605 }
2606 }
2607 lck_mtx_unlock(&vm_page_queue_free_lock);
2608
2609 if (need_priv_wakeup != 0) {
2610 /*
2611 * There shouldn't be that many VM-privileged threads,
2612 * so let's wake them all up, even if we don't quite
2613 * have enough pages to satisfy them all.
2614 */
2615 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2616 }
2617 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2618 /*
2619 * We don't expect to have any more waiters
2620 * after this, so let's wake them all up at
2621 * once.
2622 */
2623 thread_wakeup((event_t) &vm_page_free_count);
2624 } else for (; need_wakeup != 0; need_wakeup--) {
2625 /*
2626 * Wake up one waiter per page we just released.
2627 */
2628 thread_wakeup_one((event_t) &vm_page_free_count);
2629 }
2630
2631 VM_CHECK_MEMORYSTATUS;
2632 }
2633 }
2634 }
2635
2636
2637 /*
2638 * vm_page_wire:
2639 *
2640 * Mark this page as wired down by yet
2641 * another map, removing it from paging queues
2642 * as necessary.
2643 *
2644 * The page's object and the page queues must be locked.
2645 */
2646 void
2647 vm_page_wire(
2648 register vm_page_t mem)
2649 {
2650
2651 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2652
2653 VM_PAGE_CHECK(mem);
2654 if (mem->object) {
2655 vm_object_lock_assert_exclusive(mem->object);
2656 } else {
2657 /*
2658 * In theory, the page should be in an object before it
2659 * gets wired, since we need to hold the object lock
2660 * to update some fields in the page structure.
2661 * However, some code (i386 pmap, for example) might want
2662 * to wire a page before it gets inserted into an object.
2663 * That's somewhat OK, as long as nobody else can get to
2664 * that page and update it at the same time.
2665 */
2666 }
2667 #if DEBUG
2668 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2669 #endif
2670 if ( !VM_PAGE_WIRED(mem)) {
2671
2672 if (mem->pageout_queue) {
2673 mem->pageout = FALSE;
2674 vm_pageout_throttle_up(mem);
2675 }
2676 VM_PAGE_QUEUES_REMOVE(mem);
2677
2678 if (mem->object) {
2679 mem->object->wired_page_count++;
2680 assert(mem->object->resident_page_count >=
2681 mem->object->wired_page_count);
2682 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2683 assert(vm_page_purgeable_count > 0);
2684 OSAddAtomic(-1, &vm_page_purgeable_count);
2685 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2686 }
2687 if (mem->object->all_reusable) {
2688 /*
2689 * Wired pages are not counted as "re-usable"
2690 * in "all_reusable" VM objects, so nothing
2691 * to do here.
2692 */
2693 } else if (mem->reusable) {
2694 /*
2695 * This page is not "re-usable" when it's
2696 * wired, so adjust its state and the
2697 * accounting.
2698 */
2699 vm_object_reuse_pages(mem->object,
2700 mem->offset,
2701 mem->offset+PAGE_SIZE_64,
2702 FALSE);
2703 }
2704 }
2705 assert(!mem->reusable);
2706
2707 if (!mem->private && !mem->fictitious && !mem->gobbled)
2708 vm_page_wire_count++;
2709 if (mem->gobbled)
2710 vm_page_gobble_count--;
2711 mem->gobbled = FALSE;
2712
2713 VM_CHECK_MEMORYSTATUS;
2714
2715 /*
2716 * ENCRYPTED SWAP:
2717 * The page could be encrypted, but
2718 * We don't have to decrypt it here
2719 * because we don't guarantee that the
2720 * data is actually valid at this point.
2721 * The page will get decrypted in
2722 * vm_fault_wire() if needed.
2723 */
2724 }
2725 assert(!mem->gobbled);
2726 mem->wire_count++;
2727 VM_PAGE_CHECK(mem);
2728 }
2729
2730 /*
2731 * vm_page_gobble:
2732 *
2733 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2734 *
2735 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2736 */
2737 void
2738 vm_page_gobble(
2739 register vm_page_t mem)
2740 {
2741 vm_page_lockspin_queues();
2742 VM_PAGE_CHECK(mem);
2743
2744 assert(!mem->gobbled);
2745 assert( !VM_PAGE_WIRED(mem));
2746
2747 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2748 if (!mem->private && !mem->fictitious)
2749 vm_page_wire_count++;
2750 }
2751 vm_page_gobble_count++;
2752 mem->gobbled = TRUE;
2753 vm_page_unlock_queues();
2754 }
2755
2756 /*
2757 * vm_page_unwire:
2758 *
2759 * Release one wiring of this page, potentially
2760 * enabling it to be paged again.
2761 *
2762 * The page's object and the page queues must be locked.
2763 */
2764 void
2765 vm_page_unwire(
2766 vm_page_t mem,
2767 boolean_t queueit)
2768 {
2769
2770 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2771
2772 VM_PAGE_CHECK(mem);
2773 assert(VM_PAGE_WIRED(mem));
2774 assert(mem->object != VM_OBJECT_NULL);
2775 #if DEBUG
2776 vm_object_lock_assert_exclusive(mem->object);
2777 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2778 #endif
2779 if (--mem->wire_count == 0) {
2780 assert(!mem->private && !mem->fictitious);
2781 vm_page_wire_count--;
2782 assert(mem->object->wired_page_count > 0);
2783 mem->object->wired_page_count--;
2784 assert(mem->object->resident_page_count >=
2785 mem->object->wired_page_count);
2786 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2787 OSAddAtomic(+1, &vm_page_purgeable_count);
2788 assert(vm_page_purgeable_wired_count > 0);
2789 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2790 }
2791 assert(!mem->laundry);
2792 assert(mem->object != kernel_object);
2793 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2794
2795 if (queueit == TRUE) {
2796 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2797 vm_page_deactivate(mem);
2798 } else {
2799 vm_page_activate(mem);
2800 }
2801 }
2802
2803 VM_CHECK_MEMORYSTATUS;
2804
2805 }
2806 VM_PAGE_CHECK(mem);
2807 }
2808
2809 /*
2810 * vm_page_deactivate:
2811 *
2812 * Returns the given page to the inactive list,
2813 * indicating that no physical maps have access
2814 * to this page. [Used by the physical mapping system.]
2815 *
2816 * The page queues must be locked.
2817 */
2818 void
2819 vm_page_deactivate(
2820 vm_page_t m)
2821 {
2822 vm_page_deactivate_internal(m, TRUE);
2823 }
2824
2825
2826 void
2827 vm_page_deactivate_internal(
2828 vm_page_t m,
2829 boolean_t clear_hw_reference)
2830 {
2831
2832 VM_PAGE_CHECK(m);
2833 assert(m->object != kernel_object);
2834 assert(m->phys_page != vm_page_guard_addr);
2835
2836 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2837 #if DEBUG
2838 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2839 #endif
2840 /*
2841 * This page is no longer very interesting. If it was
2842 * interesting (active or inactive/referenced), then we
2843 * clear the reference bit and (re)enter it in the
2844 * inactive queue. Note wired pages should not have
2845 * their reference bit cleared.
2846 */
2847 assert ( !(m->absent && !m->unusual));
2848
2849 if (m->gobbled) { /* can this happen? */
2850 assert( !VM_PAGE_WIRED(m));
2851
2852 if (!m->private && !m->fictitious)
2853 vm_page_wire_count--;
2854 vm_page_gobble_count--;
2855 m->gobbled = FALSE;
2856 }
2857 /*
2858 * if this page is currently on the pageout queue, we can't do the
2859 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2860 * and we can't remove it manually since we would need the object lock
2861 * (which is not required here) to decrement the activity_in_progress
2862 * reference which is held on the object while the page is in the pageout queue...
2863 * just let the normal laundry processing proceed
2864 */
2865 if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
2866 return;
2867
2868 if (!m->absent && clear_hw_reference == TRUE)
2869 pmap_clear_reference(m->phys_page);
2870
2871 m->reference = FALSE;
2872 m->no_cache = FALSE;
2873
2874 if (!m->inactive) {
2875 VM_PAGE_QUEUES_REMOVE(m);
2876
2877 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2878 m->dirty && m->object->internal &&
2879 (m->object->purgable == VM_PURGABLE_DENY ||
2880 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2881 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2882 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2883 m->throttled = TRUE;
2884 vm_page_throttled_count++;
2885 } else {
2886 if (m->object->named && m->object->ref_count == 1) {
2887 vm_page_speculate(m, FALSE);
2888 #if DEVELOPMENT || DEBUG
2889 vm_page_speculative_recreated++;
2890 #endif
2891 } else {
2892 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2893 }
2894 }
2895 }
2896 }
2897
2898 /*
2899 * vm_page_enqueue_cleaned
2900 *
2901 * Put the page on the cleaned queue, mark it cleaned, etc.
2902 * Being on the cleaned queue (and having m->clean_queue set)
2903 * does ** NOT ** guarantee that the page is clean!
2904 *
2905 * Call with the queues lock held.
2906 */
2907
2908 void vm_page_enqueue_cleaned(vm_page_t m)
2909 {
2910 assert(m->phys_page != vm_page_guard_addr);
2911 #if DEBUG
2912 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2913 #endif
2914 assert( !(m->absent && !m->unusual));
2915
2916 if (m->gobbled) {
2917 assert( !VM_PAGE_WIRED(m));
2918 if (!m->private && !m->fictitious)
2919 vm_page_wire_count--;
2920 vm_page_gobble_count--;
2921 m->gobbled = FALSE;
2922 }
2923 /*
2924 * if this page is currently on the pageout queue, we can't do the
2925 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926 * and we can't remove it manually since we would need the object lock
2927 * (which is not required here) to decrement the activity_in_progress
2928 * reference which is held on the object while the page is in the pageout queue...
2929 * just let the normal laundry processing proceed
2930 */
2931 if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2932 return;
2933
2934 VM_PAGE_QUEUES_REMOVE(m);
2935
2936 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2937 m->clean_queue = TRUE;
2938 vm_page_cleaned_count++;
2939
2940 m->inactive = TRUE;
2941 vm_page_inactive_count++;
2942 if (m->object->internal) {
2943 vm_page_pageable_internal_count++;
2944 } else {
2945 vm_page_pageable_external_count++;
2946 }
2947
2948 vm_pageout_enqueued_cleaned++;
2949 }
2950
2951 /*
2952 * vm_page_activate:
2953 *
2954 * Put the specified page on the active list (if appropriate).
2955 *
2956 * The page queues must be locked.
2957 */
2958
2959 #if CONFIG_JETSAM
2960 #if LATENCY_JETSAM
2961 extern struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS];
2962 #endif /* LATENCY_JETSAM */
2963 #endif /* CONFIG_JETSAM */
2964
2965 void
2966 vm_page_activate(
2967 register vm_page_t m)
2968 {
2969 VM_PAGE_CHECK(m);
2970 #ifdef FIXME_4778297
2971 assert(m->object != kernel_object);
2972 #endif
2973 assert(m->phys_page != vm_page_guard_addr);
2974 #if DEBUG
2975 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2976 #endif
2977 assert( !(m->absent && !m->unusual));
2978
2979 if (m->gobbled) {
2980 assert( !VM_PAGE_WIRED(m));
2981 if (!m->private && !m->fictitious)
2982 vm_page_wire_count--;
2983 vm_page_gobble_count--;
2984 m->gobbled = FALSE;
2985 }
2986 /*
2987 * if this page is currently on the pageout queue, we can't do the
2988 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2989 * and we can't remove it manually since we would need the object lock
2990 * (which is not required here) to decrement the activity_in_progress
2991 * reference which is held on the object while the page is in the pageout queue...
2992 * just let the normal laundry processing proceed
2993 */
2994 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
2995 return;
2996
2997 #if DEBUG
2998 if (m->active)
2999 panic("vm_page_activate: already active");
3000 #endif
3001
3002 if (m->speculative) {
3003 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3004 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3005 }
3006
3007 VM_PAGE_QUEUES_REMOVE(m);
3008
3009 if ( !VM_PAGE_WIRED(m)) {
3010
3011 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3012 m->dirty && m->object->internal &&
3013 (m->object->purgable == VM_PURGABLE_DENY ||
3014 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3015 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3016 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3017 m->throttled = TRUE;
3018 vm_page_throttled_count++;
3019 } else {
3020 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3021 m->active = TRUE;
3022 vm_page_active_count++;
3023 if (m->object->internal) {
3024 vm_page_pageable_internal_count++;
3025 } else {
3026 vm_page_pageable_external_count++;
3027 }
3028 #if LATENCY_JETSAM
3029 if (jlp_init) {
3030 uint64_t now = mach_absolute_time();
3031 uint64_t delta = now - jlp_time;
3032 clock_sec_t jl_secs = 0;
3033 clock_usec_t jl_usecs = 0;
3034 vm_page_t jlp;
3035
3036 absolutetime_to_microtime(delta, &jl_secs, &jl_usecs);
3037
3038 jl_usecs += jl_secs * USEC_PER_SEC;
3039 if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) {
3040
3041 jlp = &jetsam_latency_page[jlp_current];
3042 if (jlp->active) {
3043 queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq);
3044 }
3045 queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq);
3046
3047 jlp->active = TRUE;
3048
3049 jlp->offset = now;
3050 jlp_time = jlp->offset;
3051
3052 if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) {
3053 jlp_current = 0;
3054 }
3055
3056 }
3057 }
3058 #endif /* LATENCY_JETSAM */
3059 }
3060 m->reference = TRUE;
3061 m->no_cache = FALSE;
3062 }
3063 VM_PAGE_CHECK(m);
3064 }
3065
3066
3067 /*
3068 * vm_page_speculate:
3069 *
3070 * Put the specified page on the speculative list (if appropriate).
3071 *
3072 * The page queues must be locked.
3073 */
3074 void
3075 vm_page_speculate(
3076 vm_page_t m,
3077 boolean_t new)
3078 {
3079 struct vm_speculative_age_q *aq;
3080
3081 VM_PAGE_CHECK(m);
3082 assert(m->object != kernel_object);
3083 assert(m->phys_page != vm_page_guard_addr);
3084 #if DEBUG
3085 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3086 #endif
3087 assert( !(m->absent && !m->unusual));
3088
3089 /*
3090 * if this page is currently on the pageout queue, we can't do the
3091 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3092 * and we can't remove it manually since we would need the object lock
3093 * (which is not required here) to decrement the activity_in_progress
3094 * reference which is held on the object while the page is in the pageout queue...
3095 * just let the normal laundry processing proceed
3096 */
3097 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
3098 return;
3099
3100 VM_PAGE_QUEUES_REMOVE(m);
3101
3102 if ( !VM_PAGE_WIRED(m)) {
3103 mach_timespec_t ts;
3104 clock_sec_t sec;
3105 clock_nsec_t nsec;
3106
3107 clock_get_system_nanotime(&sec, &nsec);
3108 ts.tv_sec = (unsigned int) sec;
3109 ts.tv_nsec = nsec;
3110
3111 if (vm_page_speculative_count == 0) {
3112
3113 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3114 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3115
3116 aq = &vm_page_queue_speculative[speculative_age_index];
3117
3118 /*
3119 * set the timer to begin a new group
3120 */
3121 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3122 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3123
3124 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3125 } else {
3126 aq = &vm_page_queue_speculative[speculative_age_index];
3127
3128 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3129
3130 speculative_age_index++;
3131
3132 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3133 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3134 if (speculative_age_index == speculative_steal_index) {
3135 speculative_steal_index = speculative_age_index + 1;
3136
3137 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3138 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3139 }
3140 aq = &vm_page_queue_speculative[speculative_age_index];
3141
3142 if (!queue_empty(&aq->age_q))
3143 vm_page_speculate_ageit(aq);
3144
3145 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3146 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3147
3148 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3149 }
3150 }
3151 enqueue_tail(&aq->age_q, &m->pageq);
3152 m->speculative = TRUE;
3153 vm_page_speculative_count++;
3154 if (m->object->internal) {
3155 vm_page_pageable_internal_count++;
3156 } else {
3157 vm_page_pageable_external_count++;
3158 }
3159
3160 if (new == TRUE) {
3161 vm_object_lock_assert_exclusive(m->object);
3162
3163 m->object->pages_created++;
3164 #if DEVELOPMENT || DEBUG
3165 vm_page_speculative_created++;
3166 #endif
3167 }
3168 }
3169 VM_PAGE_CHECK(m);
3170 }
3171
3172
3173 /*
3174 * move pages from the specified aging bin to
3175 * the speculative bin that pageout_scan claims from
3176 *
3177 * The page queues must be locked.
3178 */
3179 void
3180 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3181 {
3182 struct vm_speculative_age_q *sq;
3183 vm_page_t t;
3184
3185 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3186
3187 if (queue_empty(&sq->age_q)) {
3188 sq->age_q.next = aq->age_q.next;
3189 sq->age_q.prev = aq->age_q.prev;
3190
3191 t = (vm_page_t)sq->age_q.next;
3192 t->pageq.prev = &sq->age_q;
3193
3194 t = (vm_page_t)sq->age_q.prev;
3195 t->pageq.next = &sq->age_q;
3196 } else {
3197 t = (vm_page_t)sq->age_q.prev;
3198 t->pageq.next = aq->age_q.next;
3199
3200 t = (vm_page_t)aq->age_q.next;
3201 t->pageq.prev = sq->age_q.prev;
3202
3203 t = (vm_page_t)aq->age_q.prev;
3204 t->pageq.next = &sq->age_q;
3205
3206 sq->age_q.prev = aq->age_q.prev;
3207 }
3208 queue_init(&aq->age_q);
3209 }
3210
3211
3212 void
3213 vm_page_lru(
3214 vm_page_t m)
3215 {
3216 VM_PAGE_CHECK(m);
3217 assert(m->object != kernel_object);
3218 assert(m->phys_page != vm_page_guard_addr);
3219
3220 #if DEBUG
3221 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3222 #endif
3223 /*
3224 * if this page is currently on the pageout queue, we can't do the
3225 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3226 * and we can't remove it manually since we would need the object lock
3227 * (which is not required here) to decrement the activity_in_progress
3228 * reference which is held on the object while the page is in the pageout queue...
3229 * just let the normal laundry processing proceed
3230 */
3231 if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3232 return;
3233
3234 m->no_cache = FALSE;
3235
3236 VM_PAGE_QUEUES_REMOVE(m);
3237
3238 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3239 }
3240
3241
3242 void
3243 vm_page_reactivate_all_throttled(void)
3244 {
3245 vm_page_t first_throttled, last_throttled;
3246 vm_page_t first_active;
3247 vm_page_t m;
3248 int extra_active_count;
3249 int extra_internal_count, extra_external_count;
3250
3251 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3252 return;
3253
3254 extra_active_count = 0;
3255 extra_internal_count = 0;
3256 extra_external_count = 0;
3257 vm_page_lock_queues();
3258 if (! queue_empty(&vm_page_queue_throttled)) {
3259 /*
3260 * Switch "throttled" pages to "active".
3261 */
3262 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3263 VM_PAGE_CHECK(m);
3264 assert(m->throttled);
3265 assert(!m->active);
3266 assert(!m->inactive);
3267 assert(!m->speculative);
3268 assert(!VM_PAGE_WIRED(m));
3269
3270 extra_active_count++;
3271 if (m->object->internal) {
3272 extra_internal_count++;
3273 } else {
3274 extra_external_count++;
3275 }
3276
3277 m->throttled = FALSE;
3278 m->active = TRUE;
3279 VM_PAGE_CHECK(m);
3280 }
3281
3282 /*
3283 * Transfer the entire throttled queue to a regular LRU page queues.
3284 * We insert it at the head of the active queue, so that these pages
3285 * get re-evaluated by the LRU algorithm first, since they've been
3286 * completely out of it until now.
3287 */
3288 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3289 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3290 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3291 if (queue_empty(&vm_page_queue_active)) {
3292 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3293 } else {
3294 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3295 }
3296 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3297 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3298 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3299
3300 #if DEBUG
3301 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3302 #endif
3303 queue_init(&vm_page_queue_throttled);
3304 /*
3305 * Adjust the global page counts.
3306 */
3307 vm_page_active_count += extra_active_count;
3308 vm_page_pageable_internal_count += extra_internal_count;
3309 vm_page_pageable_external_count += extra_external_count;
3310 vm_page_throttled_count = 0;
3311 }
3312 assert(vm_page_throttled_count == 0);
3313 assert(queue_empty(&vm_page_queue_throttled));
3314 vm_page_unlock_queues();
3315 }
3316
3317
3318 /*
3319 * move pages from the indicated local queue to the global active queue
3320 * its ok to fail if we're below the hard limit and force == FALSE
3321 * the nolocks == TRUE case is to allow this function to be run on
3322 * the hibernate path
3323 */
3324
3325 void
3326 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3327 {
3328 struct vpl *lq;
3329 vm_page_t first_local, last_local;
3330 vm_page_t first_active;
3331 vm_page_t m;
3332 uint32_t count = 0;
3333
3334 if (vm_page_local_q == NULL)
3335 return;
3336
3337 lq = &vm_page_local_q[lid].vpl_un.vpl;
3338
3339 if (nolocks == FALSE) {
3340 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3341 if ( !vm_page_trylockspin_queues())
3342 return;
3343 } else
3344 vm_page_lockspin_queues();
3345
3346 VPL_LOCK(&lq->vpl_lock);
3347 }
3348 if (lq->vpl_count) {
3349 /*
3350 * Switch "local" pages to "active".
3351 */
3352 assert(!queue_empty(&lq->vpl_queue));
3353
3354 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3355 VM_PAGE_CHECK(m);
3356 assert(m->local);
3357 assert(!m->active);
3358 assert(!m->inactive);
3359 assert(!m->speculative);
3360 assert(!VM_PAGE_WIRED(m));
3361 assert(!m->throttled);
3362 assert(!m->fictitious);
3363
3364 if (m->local_id != lid)
3365 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3366
3367 m->local_id = 0;
3368 m->local = FALSE;
3369 m->active = TRUE;
3370 VM_PAGE_CHECK(m);
3371
3372 count++;
3373 }
3374 if (count != lq->vpl_count)
3375 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3376
3377 /*
3378 * Transfer the entire local queue to a regular LRU page queues.
3379 */
3380 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3381 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3382 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3383
3384 if (queue_empty(&vm_page_queue_active)) {
3385 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3386 } else {
3387 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3388 }
3389 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3390 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3391 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3392
3393 queue_init(&lq->vpl_queue);
3394 /*
3395 * Adjust the global page counts.
3396 */
3397 vm_page_active_count += lq->vpl_count;
3398 vm_page_pageable_internal_count += lq->vpl_internal_count;
3399 vm_page_pageable_external_count += lq->vpl_external_count;
3400 lq->vpl_count = 0;
3401 lq->vpl_internal_count = 0;
3402 lq->vpl_external_count = 0;
3403 }
3404 assert(queue_empty(&lq->vpl_queue));
3405
3406 if (nolocks == FALSE) {
3407 VPL_UNLOCK(&lq->vpl_lock);
3408 vm_page_unlock_queues();
3409 }
3410 }
3411
3412 /*
3413 * vm_page_part_zero_fill:
3414 *
3415 * Zero-fill a part of the page.
3416 */
3417 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3418 void
3419 vm_page_part_zero_fill(
3420 vm_page_t m,
3421 vm_offset_t m_pa,
3422 vm_size_t len)
3423 {
3424
3425 #if 0
3426 /*
3427 * we don't hold the page queue lock
3428 * so this check isn't safe to make
3429 */
3430 VM_PAGE_CHECK(m);
3431 #endif
3432
3433 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3434 pmap_zero_part_page(m->phys_page, m_pa, len);
3435 #else
3436 vm_page_t tmp;
3437 while (1) {
3438 tmp = vm_page_grab();
3439 if (tmp == VM_PAGE_NULL) {
3440 vm_page_wait(THREAD_UNINT);
3441 continue;
3442 }
3443 break;
3444 }
3445 vm_page_zero_fill(tmp);
3446 if(m_pa != 0) {
3447 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3448 }
3449 if((m_pa + len) < PAGE_SIZE) {
3450 vm_page_part_copy(m, m_pa + len, tmp,
3451 m_pa + len, PAGE_SIZE - (m_pa + len));
3452 }
3453 vm_page_copy(tmp,m);
3454 VM_PAGE_FREE(tmp);
3455 #endif
3456
3457 }
3458
3459 /*
3460 * vm_page_zero_fill:
3461 *
3462 * Zero-fill the specified page.
3463 */
3464 void
3465 vm_page_zero_fill(
3466 vm_page_t m)
3467 {
3468 XPR(XPR_VM_PAGE,
3469 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3470 m->object, m->offset, m, 0,0);
3471 #if 0
3472 /*
3473 * we don't hold the page queue lock
3474 * so this check isn't safe to make
3475 */
3476 VM_PAGE_CHECK(m);
3477 #endif
3478
3479 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3480 pmap_zero_page(m->phys_page);
3481 }
3482
3483 /*
3484 * vm_page_part_copy:
3485 *
3486 * copy part of one page to another
3487 */
3488
3489 void
3490 vm_page_part_copy(
3491 vm_page_t src_m,
3492 vm_offset_t src_pa,
3493 vm_page_t dst_m,
3494 vm_offset_t dst_pa,
3495 vm_size_t len)
3496 {
3497 #if 0
3498 /*
3499 * we don't hold the page queue lock
3500 * so this check isn't safe to make
3501 */
3502 VM_PAGE_CHECK(src_m);
3503 VM_PAGE_CHECK(dst_m);
3504 #endif
3505 pmap_copy_part_page(src_m->phys_page, src_pa,
3506 dst_m->phys_page, dst_pa, len);
3507 }
3508
3509 /*
3510 * vm_page_copy:
3511 *
3512 * Copy one page to another
3513 *
3514 * ENCRYPTED SWAP:
3515 * The source page should not be encrypted. The caller should
3516 * make sure the page is decrypted first, if necessary.
3517 */
3518
3519 int vm_page_copy_cs_validations = 0;
3520 int vm_page_copy_cs_tainted = 0;
3521
3522 void
3523 vm_page_copy(
3524 vm_page_t src_m,
3525 vm_page_t dest_m)
3526 {
3527 XPR(XPR_VM_PAGE,
3528 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3529 src_m->object, src_m->offset,
3530 dest_m->object, dest_m->offset,
3531 0);
3532 #if 0
3533 /*
3534 * we don't hold the page queue lock
3535 * so this check isn't safe to make
3536 */
3537 VM_PAGE_CHECK(src_m);
3538 VM_PAGE_CHECK(dest_m);
3539 #endif
3540 vm_object_lock_assert_held(src_m->object);
3541
3542 /*
3543 * ENCRYPTED SWAP:
3544 * The source page should not be encrypted at this point.
3545 * The destination page will therefore not contain encrypted
3546 * data after the copy.
3547 */
3548 if (src_m->encrypted) {
3549 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3550 }
3551 dest_m->encrypted = FALSE;
3552
3553 if (src_m->object != VM_OBJECT_NULL &&
3554 src_m->object->code_signed) {
3555 /*
3556 * We're copying a page from a code-signed object.
3557 * Whoever ends up mapping the copy page might care about
3558 * the original page's integrity, so let's validate the
3559 * source page now.
3560 */
3561 vm_page_copy_cs_validations++;
3562 vm_page_validate_cs(src_m);
3563 }
3564
3565 if (vm_page_is_slideable(src_m)) {
3566 boolean_t was_busy = src_m->busy;
3567 src_m->busy = TRUE;
3568 (void) vm_page_slide(src_m, 0);
3569 assert(src_m->busy);
3570 if (!was_busy) {
3571 PAGE_WAKEUP_DONE(src_m);
3572 }
3573 }
3574
3575 /*
3576 * Propagate the cs_tainted bit to the copy page. Do not propagate
3577 * the cs_validated bit.
3578 */
3579 dest_m->cs_tainted = src_m->cs_tainted;
3580 if (dest_m->cs_tainted) {
3581 vm_page_copy_cs_tainted++;
3582 }
3583 dest_m->slid = src_m->slid;
3584 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3585 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3586 }
3587
3588 #if MACH_ASSERT
3589 static void
3590 _vm_page_print(
3591 vm_page_t p)
3592 {
3593 printf("vm_page %p: \n", p);
3594 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3595 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3596 printf(" next=%p\n", p->next);
3597 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3598 printf(" wire_count=%u\n", p->wire_count);
3599
3600 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3601 (p->local ? "" : "!"),
3602 (p->inactive ? "" : "!"),
3603 (p->active ? "" : "!"),
3604 (p->pageout_queue ? "" : "!"),
3605 (p->speculative ? "" : "!"),
3606 (p->laundry ? "" : "!"));
3607 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3608 (p->free ? "" : "!"),
3609 (p->reference ? "" : "!"),
3610 (p->gobbled ? "" : "!"),
3611 (p->private ? "" : "!"),
3612 (p->throttled ? "" : "!"));
3613 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3614 (p->busy ? "" : "!"),
3615 (p->wanted ? "" : "!"),
3616 (p->tabled ? "" : "!"),
3617 (p->fictitious ? "" : "!"),
3618 (p->pmapped ? "" : "!"),
3619 (p->wpmapped ? "" : "!"));
3620 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3621 (p->pageout ? "" : "!"),
3622 (p->absent ? "" : "!"),
3623 (p->error ? "" : "!"),
3624 (p->dirty ? "" : "!"),
3625 (p->cleaning ? "" : "!"),
3626 (p->precious ? "" : "!"),
3627 (p->clustered ? "" : "!"));
3628 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3629 (p->overwriting ? "" : "!"),
3630 (p->restart ? "" : "!"),
3631 (p->unusual ? "" : "!"),
3632 (p->encrypted ? "" : "!"),
3633 (p->encrypted_cleaning ? "" : "!"));
3634 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3635 (p->cs_validated ? "" : "!"),
3636 (p->cs_tainted ? "" : "!"),
3637 (p->no_cache ? "" : "!"));
3638
3639 printf("phys_page=0x%x\n", p->phys_page);
3640 }
3641
3642 /*
3643 * Check that the list of pages is ordered by
3644 * ascending physical address and has no holes.
3645 */
3646 static int
3647 vm_page_verify_contiguous(
3648 vm_page_t pages,
3649 unsigned int npages)
3650 {
3651 register vm_page_t m;
3652 unsigned int page_count;
3653 vm_offset_t prev_addr;
3654
3655 prev_addr = pages->phys_page;
3656 page_count = 1;
3657 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3658 if (m->phys_page != prev_addr + 1) {
3659 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3660 m, (long)prev_addr, m->phys_page);
3661 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3662 panic("vm_page_verify_contiguous: not contiguous!");
3663 }
3664 prev_addr = m->phys_page;
3665 ++page_count;
3666 }
3667 if (page_count != npages) {
3668 printf("pages %p actual count 0x%x but requested 0x%x\n",
3669 pages, page_count, npages);
3670 panic("vm_page_verify_contiguous: count error");
3671 }
3672 return 1;
3673 }
3674
3675
3676 /*
3677 * Check the free lists for proper length etc.
3678 */
3679 static unsigned int
3680 vm_page_verify_free_list(
3681 queue_head_t *vm_page_queue,
3682 unsigned int color,
3683 vm_page_t look_for_page,
3684 boolean_t expect_page)
3685 {
3686 unsigned int npages;
3687 vm_page_t m;
3688 vm_page_t prev_m;
3689 boolean_t found_page;
3690
3691 found_page = FALSE;
3692 npages = 0;
3693 prev_m = (vm_page_t) vm_page_queue;
3694 queue_iterate(vm_page_queue,
3695 m,
3696 vm_page_t,
3697 pageq) {
3698
3699 if (m == look_for_page) {
3700 found_page = TRUE;
3701 }
3702 if ((vm_page_t) m->pageq.prev != prev_m)
3703 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3704 color, npages, m, m->pageq.prev, prev_m);
3705 if ( ! m->busy )
3706 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3707 color, npages, m);
3708 if (color != (unsigned int) -1) {
3709 if ((m->phys_page & vm_color_mask) != color)
3710 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3711 color, npages, m, m->phys_page & vm_color_mask, color);
3712 if ( ! m->free )
3713 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3714 color, npages, m);
3715 }
3716 ++npages;
3717 prev_m = m;
3718 }
3719 if (look_for_page != VM_PAGE_NULL) {
3720 unsigned int other_color;
3721
3722 if (expect_page && !found_page) {
3723 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3724 color, npages, look_for_page, look_for_page->phys_page);
3725 _vm_page_print(look_for_page);
3726 for (other_color = 0;
3727 other_color < vm_colors;
3728 other_color++) {
3729 if (other_color == color)
3730 continue;
3731 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3732 other_color, look_for_page, FALSE);
3733 }
3734 if (color == (unsigned int) -1) {
3735 vm_page_verify_free_list(&vm_lopage_queue_free,
3736 (unsigned int) -1, look_for_page, FALSE);
3737 }
3738 panic("vm_page_verify_free_list(color=%u)\n", color);
3739 }
3740 if (!expect_page && found_page) {
3741 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3742 color, npages, look_for_page, look_for_page->phys_page);
3743 }
3744 }
3745 return npages;
3746 }
3747
3748 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3749 static void
3750 vm_page_verify_free_lists( void )
3751 {
3752 unsigned int color, npages, nlopages;
3753
3754 if (! vm_page_verify_free_lists_enabled)
3755 return;
3756
3757 npages = 0;
3758
3759 lck_mtx_lock(&vm_page_queue_free_lock);
3760
3761 for( color = 0; color < vm_colors; color++ ) {
3762 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3763 color, VM_PAGE_NULL, FALSE);
3764 }
3765 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3766 (unsigned int) -1,
3767 VM_PAGE_NULL, FALSE);
3768 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3769 panic("vm_page_verify_free_lists: "
3770 "npages %u free_count %d nlopages %u lo_free_count %u",
3771 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3772
3773 lck_mtx_unlock(&vm_page_queue_free_lock);
3774 }
3775
3776 void
3777 vm_page_queues_assert(
3778 vm_page_t mem,
3779 int val)
3780 {
3781 #if DEBUG
3782 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3783 #endif
3784 if (mem->free + mem->active + mem->inactive + mem->speculative +
3785 mem->throttled + mem->pageout_queue > (val)) {
3786 _vm_page_print(mem);
3787 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3788 }
3789 if (VM_PAGE_WIRED(mem)) {
3790 assert(!mem->active);
3791 assert(!mem->inactive);
3792 assert(!mem->speculative);
3793 assert(!mem->throttled);
3794 assert(!mem->pageout_queue);
3795 }
3796 }
3797 #endif /* MACH_ASSERT */
3798
3799
3800 /*
3801 * CONTIGUOUS PAGE ALLOCATION
3802 *
3803 * Find a region large enough to contain at least n pages
3804 * of contiguous physical memory.
3805 *
3806 * This is done by traversing the vm_page_t array in a linear fashion
3807 * we assume that the vm_page_t array has the avaiable physical pages in an
3808 * ordered, ascending list... this is currently true of all our implementations
3809 * and must remain so... there can be 'holes' in the array... we also can
3810 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3811 * which use to happen via 'vm_page_convert'... that function was no longer
3812 * being called and was removed...
3813 *
3814 * The basic flow consists of stabilizing some of the interesting state of
3815 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3816 * sweep at the beginning of the array looking for pages that meet our criterea
3817 * for a 'stealable' page... currently we are pretty conservative... if the page
3818 * meets this criterea and is physically contiguous to the previous page in the 'run'
3819 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3820 * and start to develop a new run... if at this point we've already considered
3821 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3822 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3823 * to other threads trying to acquire free pages (or move pages from q to q),
3824 * and then continue from the spot we left off... we only make 1 pass through the
3825 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3826 * which steals the pages from the queues they're currently on... pages on the free
3827 * queue can be stolen directly... pages that are on any of the other queues
3828 * must be removed from the object they are tabled on... this requires taking the
3829 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3830 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3831 * dump the pages we've currently stolen back to the free list, and pick up our
3832 * scan from the point where we aborted the 'current' run.
3833 *
3834 *
3835 * Requirements:
3836 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3837 *
3838 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3839 *
3840 * Algorithm:
3841 */
3842
3843 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3844
3845
3846 #define RESET_STATE_OF_RUN() \
3847 MACRO_BEGIN \
3848 prevcontaddr = -2; \
3849 start_pnum = -1; \
3850 free_considered = 0; \
3851 substitute_needed = 0; \
3852 npages = 0; \
3853 MACRO_END
3854
3855 /*
3856 * Can we steal in-use (i.e. not free) pages when searching for
3857 * physically-contiguous pages ?
3858 */
3859 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3860
3861 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3862 #if DEBUG
3863 int vm_page_find_contig_debug = 0;
3864 #endif
3865
3866 static vm_page_t
3867 vm_page_find_contiguous(
3868 unsigned int contig_pages,
3869 ppnum_t max_pnum,
3870 ppnum_t pnum_mask,
3871 boolean_t wire,
3872 int flags)
3873 {
3874 vm_page_t m = NULL;
3875 ppnum_t prevcontaddr;
3876 ppnum_t start_pnum;
3877 unsigned int npages, considered, scanned;
3878 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3879 unsigned int idx_last_contig_page_found = 0;
3880 int free_considered, free_available;
3881 int substitute_needed;
3882 boolean_t wrapped;
3883 #if DEBUG
3884 clock_sec_t tv_start_sec, tv_end_sec;
3885 clock_usec_t tv_start_usec, tv_end_usec;
3886 #endif
3887 #if MACH_ASSERT
3888 int yielded = 0;
3889 int dumped_run = 0;
3890 int stolen_pages = 0;
3891 int compressed_pages = 0;
3892 #endif
3893
3894 if (contig_pages == 0)
3895 return VM_PAGE_NULL;
3896
3897 #if MACH_ASSERT
3898 vm_page_verify_free_lists();
3899 #endif
3900 #if DEBUG
3901 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3902 #endif
3903 PAGE_REPLACEMENT_ALLOWED(TRUE);
3904
3905 vm_page_lock_queues();
3906 lck_mtx_lock(&vm_page_queue_free_lock);
3907
3908 RESET_STATE_OF_RUN();
3909
3910 scanned = 0;
3911 considered = 0;
3912 free_available = vm_page_free_count - vm_page_free_reserved;
3913
3914 wrapped = FALSE;
3915
3916 if(flags & KMA_LOMEM)
3917 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3918 else
3919 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3920
3921 orig_last_idx = idx_last_contig_page_found;
3922 last_idx = orig_last_idx;
3923
3924 for (page_idx = last_idx, start_idx = last_idx;
3925 npages < contig_pages && page_idx < vm_pages_count;
3926 page_idx++) {
3927 retry:
3928 if (wrapped &&
3929 npages == 0 &&
3930 page_idx >= orig_last_idx) {
3931 /*
3932 * We're back where we started and we haven't
3933 * found any suitable contiguous range. Let's
3934 * give up.
3935 */
3936 break;
3937 }
3938 scanned++;
3939 m = &vm_pages[page_idx];
3940
3941 assert(!m->fictitious);
3942 assert(!m->private);
3943
3944 if (max_pnum && m->phys_page > max_pnum) {
3945 /* no more low pages... */
3946 break;
3947 }
3948 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3949 /*
3950 * not aligned
3951 */
3952 RESET_STATE_OF_RUN();
3953
3954 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3955 m->encrypted_cleaning ||
3956 m->pageout_queue || m->laundry || m->wanted ||
3957 m->cleaning || m->overwriting || m->pageout) {
3958 /*
3959 * page is in a transient state
3960 * or a state we don't want to deal
3961 * with, so don't consider it which
3962 * means starting a new run
3963 */
3964 RESET_STATE_OF_RUN();
3965
3966 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
3967 /*
3968 * page needs to be on one of our queues
3969 * or it needs to belong to the compressor pool
3970 * in order for it to be stable behind the
3971 * locks we hold at this point...
3972 * if not, don't consider it which
3973 * means starting a new run
3974 */
3975 RESET_STATE_OF_RUN();
3976
3977 } else if (!m->free && (!m->tabled || m->busy)) {
3978 /*
3979 * pages on the free list are always 'busy'
3980 * so we couldn't test for 'busy' in the check
3981 * for the transient states... pages that are
3982 * 'free' are never 'tabled', so we also couldn't
3983 * test for 'tabled'. So we check here to make
3984 * sure that a non-free page is not busy and is
3985 * tabled on an object...
3986 * if not, don't consider it which
3987 * means starting a new run
3988 */
3989 RESET_STATE_OF_RUN();
3990
3991 } else {
3992 if (m->phys_page != prevcontaddr + 1) {
3993 if ((m->phys_page & pnum_mask) != 0) {
3994 RESET_STATE_OF_RUN();
3995 goto did_consider;
3996 } else {
3997 npages = 1;
3998 start_idx = page_idx;
3999 start_pnum = m->phys_page;
4000 }
4001 } else {
4002 npages++;
4003 }
4004 prevcontaddr = m->phys_page;
4005
4006 VM_PAGE_CHECK(m);
4007 if (m->free) {
4008 free_considered++;
4009 } else {
4010 /*
4011 * This page is not free.
4012 * If we can't steal used pages,
4013 * we have to give up this run
4014 * and keep looking.
4015 * Otherwise, we might need to
4016 * move the contents of this page
4017 * into a substitute page.
4018 */
4019 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4020 if (m->pmapped || m->dirty || m->precious) {
4021 substitute_needed++;
4022 }
4023 #else
4024 RESET_STATE_OF_RUN();
4025 #endif
4026 }
4027
4028 if ((free_considered + substitute_needed) > free_available) {
4029 /*
4030 * if we let this run continue
4031 * we will end up dropping the vm_page_free_count
4032 * below the reserve limit... we need to abort
4033 * this run, but we can at least re-consider this
4034 * page... thus the jump back to 'retry'
4035 */
4036 RESET_STATE_OF_RUN();
4037
4038 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4039 considered++;
4040 goto retry;
4041 }
4042 /*
4043 * free_available == 0
4044 * so can't consider any free pages... if
4045 * we went to retry in this case, we'd
4046 * get stuck looking at the same page
4047 * w/o making any forward progress
4048 * we also want to take this path if we've already
4049 * reached our limit that controls the lock latency
4050 */
4051 }
4052 }
4053 did_consider:
4054 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4055
4056 PAGE_REPLACEMENT_ALLOWED(FALSE);
4057
4058 lck_mtx_unlock(&vm_page_queue_free_lock);
4059 vm_page_unlock_queues();
4060
4061 mutex_pause(0);
4062
4063 PAGE_REPLACEMENT_ALLOWED(TRUE);
4064
4065 vm_page_lock_queues();
4066 lck_mtx_lock(&vm_page_queue_free_lock);
4067
4068 RESET_STATE_OF_RUN();
4069 /*
4070 * reset our free page limit since we
4071 * dropped the lock protecting the vm_page_free_queue
4072 */
4073 free_available = vm_page_free_count - vm_page_free_reserved;
4074 considered = 0;
4075 #if MACH_ASSERT
4076 yielded++;
4077 #endif
4078 goto retry;
4079 }
4080 considered++;
4081 }
4082 m = VM_PAGE_NULL;
4083
4084 if (npages != contig_pages) {
4085 if (!wrapped) {
4086 /*
4087 * We didn't find a contiguous range but we didn't
4088 * start from the very first page.
4089 * Start again from the very first page.
4090 */
4091 RESET_STATE_OF_RUN();
4092 if( flags & KMA_LOMEM)
4093 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4094 else
4095 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4096 last_idx = 0;
4097 page_idx = last_idx;
4098 wrapped = TRUE;
4099 goto retry;
4100 }
4101 lck_mtx_unlock(&vm_page_queue_free_lock);
4102 } else {
4103 vm_page_t m1;
4104 vm_page_t m2;
4105 unsigned int cur_idx;
4106 unsigned int tmp_start_idx;
4107 vm_object_t locked_object = VM_OBJECT_NULL;
4108 boolean_t abort_run = FALSE;
4109
4110 assert(page_idx - start_idx == contig_pages);
4111
4112 tmp_start_idx = start_idx;
4113
4114 /*
4115 * first pass through to pull the free pages
4116 * off of the free queue so that in case we
4117 * need substitute pages, we won't grab any
4118 * of the free pages in the run... we'll clear
4119 * the 'free' bit in the 2nd pass, and even in
4120 * an abort_run case, we'll collect all of the
4121 * free pages in this run and return them to the free list
4122 */
4123 while (start_idx < page_idx) {
4124
4125 m1 = &vm_pages[start_idx++];
4126
4127 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4128 assert(m1->free);
4129 #endif
4130
4131 if (m1->free) {
4132 unsigned int color;
4133
4134 color = m1->phys_page & vm_color_mask;
4135 #if MACH_ASSERT
4136 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4137 #endif
4138 queue_remove(&vm_page_queue_free[color],
4139 m1,
4140 vm_page_t,
4141 pageq);
4142 m1->pageq.next = NULL;
4143 m1->pageq.prev = NULL;
4144 #if MACH_ASSERT
4145 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4146 #endif
4147 /*
4148 * Clear the "free" bit so that this page
4149 * does not get considered for another
4150 * concurrent physically-contiguous allocation.
4151 */
4152 m1->free = FALSE;
4153 assert(m1->busy);
4154
4155 vm_page_free_count--;
4156 }
4157 }
4158 /*
4159 * adjust global freelist counts
4160 */
4161 if (vm_page_free_count < vm_page_free_count_minimum)
4162 vm_page_free_count_minimum = vm_page_free_count;
4163
4164 if( flags & KMA_LOMEM)
4165 vm_page_lomem_find_contiguous_last_idx = page_idx;
4166 else
4167 vm_page_find_contiguous_last_idx = page_idx;
4168
4169 /*
4170 * we can drop the free queue lock at this point since
4171 * we've pulled any 'free' candidates off of the list
4172 * we need it dropped so that we can do a vm_page_grab
4173 * when substituing for pmapped/dirty pages
4174 */
4175 lck_mtx_unlock(&vm_page_queue_free_lock);
4176
4177 start_idx = tmp_start_idx;
4178 cur_idx = page_idx - 1;
4179
4180 while (start_idx++ < page_idx) {
4181 /*
4182 * must go through the list from back to front
4183 * so that the page list is created in the
4184 * correct order - low -> high phys addresses
4185 */
4186 m1 = &vm_pages[cur_idx--];
4187
4188 assert(!m1->free);
4189
4190 if (m1->object == VM_OBJECT_NULL) {
4191 /*
4192 * page has already been removed from
4193 * the free list in the 1st pass
4194 */
4195 assert(m1->offset == (vm_object_offset_t) -1);
4196 assert(m1->busy);
4197 assert(!m1->wanted);
4198 assert(!m1->laundry);
4199 } else {
4200 vm_object_t object;
4201 int refmod;
4202 boolean_t disconnected, reusable;
4203
4204 if (abort_run == TRUE)
4205 continue;
4206
4207 object = m1->object;
4208
4209 if (object != locked_object) {
4210 if (locked_object) {
4211 vm_object_unlock(locked_object);
4212 locked_object = VM_OBJECT_NULL;
4213 }
4214 if (vm_object_lock_try(object))
4215 locked_object = object;
4216 }
4217 if (locked_object == VM_OBJECT_NULL ||
4218 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4219 m1->encrypted_cleaning ||
4220 m1->pageout_queue || m1->laundry || m1->wanted ||
4221 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4222
4223 if (locked_object) {
4224 vm_object_unlock(locked_object);
4225 locked_object = VM_OBJECT_NULL;
4226 }
4227 tmp_start_idx = cur_idx;
4228 abort_run = TRUE;
4229 continue;
4230 }
4231
4232 disconnected = FALSE;
4233 reusable = FALSE;
4234
4235 if ((m1->reusable ||
4236 m1->object->all_reusable) &&
4237 m1->inactive &&
4238 !m1->dirty &&
4239 !m1->reference) {
4240 /* reusable page... */
4241 refmod = pmap_disconnect(m1->phys_page);
4242 disconnected = TRUE;
4243 if (refmod == 0) {
4244 /*
4245 * ... not reused: can steal
4246 * without relocating contents.
4247 */
4248 reusable = TRUE;
4249 }
4250 }
4251
4252 if ((m1->pmapped &&
4253 ! reusable) ||
4254 m1->dirty ||
4255 m1->precious) {
4256 vm_object_offset_t offset;
4257
4258 m2 = vm_page_grab();
4259
4260 if (m2 == VM_PAGE_NULL) {
4261 if (locked_object) {
4262 vm_object_unlock(locked_object);
4263 locked_object = VM_OBJECT_NULL;
4264 }
4265 tmp_start_idx = cur_idx;
4266 abort_run = TRUE;
4267 continue;
4268 }
4269 if (! disconnected) {
4270 if (m1->pmapped)
4271 refmod = pmap_disconnect(m1->phys_page);
4272 else
4273 refmod = 0;
4274 }
4275
4276 /* copy the page's contents */
4277 pmap_copy_page(m1->phys_page, m2->phys_page);
4278 /* copy the page's state */
4279 assert(!VM_PAGE_WIRED(m1));
4280 assert(!m1->free);
4281 assert(!m1->pageout_queue);
4282 assert(!m1->laundry);
4283 m2->reference = m1->reference;
4284 assert(!m1->gobbled);
4285 assert(!m1->private);
4286 m2->no_cache = m1->no_cache;
4287 m2->xpmapped = m1->xpmapped;
4288 assert(!m1->busy);
4289 assert(!m1->wanted);
4290 assert(!m1->fictitious);
4291 m2->pmapped = m1->pmapped; /* should flush cache ? */
4292 m2->wpmapped = m1->wpmapped;
4293 assert(!m1->pageout);
4294 m2->absent = m1->absent;
4295 m2->error = m1->error;
4296 m2->dirty = m1->dirty;
4297 assert(!m1->cleaning);
4298 m2->precious = m1->precious;
4299 m2->clustered = m1->clustered;
4300 assert(!m1->overwriting);
4301 m2->restart = m1->restart;
4302 m2->unusual = m1->unusual;
4303 m2->encrypted = m1->encrypted;
4304 assert(!m1->encrypted_cleaning);
4305 m2->cs_validated = m1->cs_validated;
4306 m2->cs_tainted = m1->cs_tainted;
4307
4308 /*
4309 * If m1 had really been reusable,
4310 * we would have just stolen it, so
4311 * let's not propagate it's "reusable"
4312 * bit and assert that m2 is not
4313 * marked as "reusable".
4314 */
4315 // m2->reusable = m1->reusable;
4316 assert(!m2->reusable);
4317
4318 assert(!m1->lopage);
4319 m2->slid = m1->slid;
4320 m2->was_dirty = m1->was_dirty;
4321 m2->compressor = m1->compressor;
4322
4323 /*
4324 * page may need to be flushed if
4325 * it is marshalled into a UPL
4326 * that is going to be used by a device
4327 * that doesn't support coherency
4328 */
4329 m2->written_by_kernel = TRUE;
4330
4331 /*
4332 * make sure we clear the ref/mod state
4333 * from the pmap layer... else we risk
4334 * inheriting state from the last time
4335 * this page was used...
4336 */
4337 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4338
4339 if (refmod & VM_MEM_REFERENCED)
4340 m2->reference = TRUE;
4341 if (refmod & VM_MEM_MODIFIED) {
4342 SET_PAGE_DIRTY(m2, TRUE);
4343 }
4344 offset = m1->offset;
4345
4346 /*
4347 * completely cleans up the state
4348 * of the page so that it is ready
4349 * to be put onto the free list, or
4350 * for this purpose it looks like it
4351 * just came off of the free list
4352 */
4353 vm_page_free_prepare(m1);
4354
4355 /*
4356 * now put the substitute page
4357 * on the object
4358 */
4359 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4360
4361 if (m2->compressor) {
4362 m2->pmapped = TRUE;
4363 m2->wpmapped = TRUE;
4364
4365 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4366 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4367 #if MACH_ASSERT
4368 compressed_pages++;
4369 #endif
4370 } else {
4371 if (m2->reference)
4372 vm_page_activate(m2);
4373 else
4374 vm_page_deactivate(m2);
4375 }
4376 PAGE_WAKEUP_DONE(m2);
4377
4378 } else {
4379 assert(!m1->compressor);
4380
4381 /*
4382 * completely cleans up the state
4383 * of the page so that it is ready
4384 * to be put onto the free list, or
4385 * for this purpose it looks like it
4386 * just came off of the free list
4387 */
4388 vm_page_free_prepare(m1);
4389 }
4390 #if MACH_ASSERT
4391 stolen_pages++;
4392 #endif
4393 }
4394 m1->pageq.next = (queue_entry_t) m;
4395 m1->pageq.prev = NULL;
4396 m = m1;
4397 }
4398 if (locked_object) {
4399 vm_object_unlock(locked_object);
4400 locked_object = VM_OBJECT_NULL;
4401 }
4402
4403 if (abort_run == TRUE) {
4404 if (m != VM_PAGE_NULL) {
4405 vm_page_free_list(m, FALSE);
4406 }
4407 #if MACH_ASSERT
4408 dumped_run++;
4409 #endif
4410 /*
4411 * want the index of the last
4412 * page in this run that was
4413 * successfully 'stolen', so back
4414 * it up 1 for the auto-decrement on use
4415 * and 1 more to bump back over this page
4416 */
4417 page_idx = tmp_start_idx + 2;
4418 if (page_idx >= vm_pages_count) {
4419 if (wrapped)
4420 goto done_scanning;
4421 page_idx = last_idx = 0;
4422 wrapped = TRUE;
4423 }
4424 abort_run = FALSE;
4425
4426 /*
4427 * We didn't find a contiguous range but we didn't
4428 * start from the very first page.
4429 * Start again from the very first page.
4430 */
4431 RESET_STATE_OF_RUN();
4432
4433 if( flags & KMA_LOMEM)
4434 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4435 else
4436 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4437
4438 last_idx = page_idx;
4439
4440 lck_mtx_lock(&vm_page_queue_free_lock);
4441 /*
4442 * reset our free page limit since we
4443 * dropped the lock protecting the vm_page_free_queue
4444 */
4445 free_available = vm_page_free_count - vm_page_free_reserved;
4446 goto retry;
4447 }
4448
4449 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4450
4451 if (wire == TRUE)
4452 m1->wire_count++;
4453 else
4454 m1->gobbled = TRUE;
4455 }
4456 if (wire == FALSE)
4457 vm_page_gobble_count += npages;
4458
4459 /*
4460 * gobbled pages are also counted as wired pages
4461 */
4462 vm_page_wire_count += npages;
4463
4464 assert(vm_page_verify_contiguous(m, npages));
4465 }
4466 done_scanning:
4467 PAGE_REPLACEMENT_ALLOWED(FALSE);
4468
4469 vm_page_unlock_queues();
4470
4471 #if DEBUG
4472 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4473
4474 tv_end_sec -= tv_start_sec;
4475 if (tv_end_usec < tv_start_usec) {
4476 tv_end_sec--;
4477 tv_end_usec += 1000000;
4478 }
4479 tv_end_usec -= tv_start_usec;
4480 if (tv_end_usec >= 1000000) {
4481 tv_end_sec++;
4482 tv_end_sec -= 1000000;
4483 }
4484 if (vm_page_find_contig_debug) {
4485 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4486 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4487 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4488 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4489 }
4490
4491 #endif
4492 #if MACH_ASSERT
4493 vm_page_verify_free_lists();
4494 #endif
4495 return m;
4496 }
4497
4498 /*
4499 * Allocate a list of contiguous, wired pages.
4500 */
4501 kern_return_t
4502 cpm_allocate(
4503 vm_size_t size,
4504 vm_page_t *list,
4505 ppnum_t max_pnum,
4506 ppnum_t pnum_mask,
4507 boolean_t wire,
4508 int flags)
4509 {
4510 vm_page_t pages;
4511 unsigned int npages;
4512
4513 if (size % PAGE_SIZE != 0)
4514 return KERN_INVALID_ARGUMENT;
4515
4516 npages = (unsigned int) (size / PAGE_SIZE);
4517 if (npages != size / PAGE_SIZE) {
4518 /* 32-bit overflow */
4519 return KERN_INVALID_ARGUMENT;
4520 }
4521
4522 /*
4523 * Obtain a pointer to a subset of the free
4524 * list large enough to satisfy the request;
4525 * the region will be physically contiguous.
4526 */
4527 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4528
4529 if (pages == VM_PAGE_NULL)
4530 return KERN_NO_SPACE;
4531 /*
4532 * determine need for wakeups
4533 */
4534 if ((vm_page_free_count < vm_page_free_min) ||
4535 ((vm_page_free_count < vm_page_free_target) &&
4536 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4537 thread_wakeup((event_t) &vm_page_free_wanted);
4538
4539 VM_CHECK_MEMORYSTATUS;
4540
4541 /*
4542 * The CPM pages should now be available and
4543 * ordered by ascending physical address.
4544 */
4545 assert(vm_page_verify_contiguous(pages, npages));
4546
4547 *list = pages;
4548 return KERN_SUCCESS;
4549 }
4550
4551
4552 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4553
4554 /*
4555 * when working on a 'run' of pages, it is necessary to hold
4556 * the vm_page_queue_lock (a hot global lock) for certain operations
4557 * on the page... however, the majority of the work can be done
4558 * while merely holding the object lock... in fact there are certain
4559 * collections of pages that don't require any work brokered by the
4560 * vm_page_queue_lock... to mitigate the time spent behind the global
4561 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4562 * while doing all of the work that doesn't require the vm_page_queue_lock...
4563 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4564 * necessary work for each page... we will grab the busy bit on the page
4565 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4566 * if it can't immediately take the vm_page_queue_lock in order to compete
4567 * for the locks in the same order that vm_pageout_scan takes them.
4568 * the operation names are modeled after the names of the routines that
4569 * need to be called in order to make the changes very obvious in the
4570 * original loop
4571 */
4572
4573 void
4574 vm_page_do_delayed_work(
4575 vm_object_t object,
4576 struct vm_page_delayed_work *dwp,
4577 int dw_count)
4578 {
4579 int j;
4580 vm_page_t m;
4581 vm_page_t local_free_q = VM_PAGE_NULL;
4582
4583 /*
4584 * pageout_scan takes the vm_page_lock_queues first
4585 * then tries for the object lock... to avoid what
4586 * is effectively a lock inversion, we'll go to the
4587 * trouble of taking them in that same order... otherwise
4588 * if this object contains the majority of the pages resident
4589 * in the UBC (or a small set of large objects actively being
4590 * worked on contain the majority of the pages), we could
4591 * cause the pageout_scan thread to 'starve' in its attempt
4592 * to find pages to move to the free queue, since it has to
4593 * successfully acquire the object lock of any candidate page
4594 * before it can steal/clean it.
4595 */
4596 if (!vm_page_trylockspin_queues()) {
4597 vm_object_unlock(object);
4598
4599 vm_page_lockspin_queues();
4600
4601 for (j = 0; ; j++) {
4602 if (!vm_object_lock_avoid(object) &&
4603 _vm_object_lock_try(object))
4604 break;
4605 vm_page_unlock_queues();
4606 mutex_pause(j);
4607 vm_page_lockspin_queues();
4608 }
4609 }
4610 for (j = 0; j < dw_count; j++, dwp++) {
4611
4612 m = dwp->dw_m;
4613
4614 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4615 vm_pageout_throttle_up(m);
4616
4617 if (dwp->dw_mask & DW_vm_page_wire)
4618 vm_page_wire(m);
4619 else if (dwp->dw_mask & DW_vm_page_unwire) {
4620 boolean_t queueit;
4621
4622 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4623
4624 vm_page_unwire(m, queueit);
4625 }
4626 if (dwp->dw_mask & DW_vm_page_free) {
4627 vm_page_free_prepare_queues(m);
4628
4629 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4630 /*
4631 * Add this page to our list of reclaimed pages,
4632 * to be freed later.
4633 */
4634 m->pageq.next = (queue_entry_t) local_free_q;
4635 local_free_q = m;
4636 } else {
4637 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4638 vm_page_deactivate_internal(m, FALSE);
4639 else if (dwp->dw_mask & DW_vm_page_activate) {
4640 if (m->active == FALSE) {
4641 vm_page_activate(m);
4642 }
4643 }
4644 else if (dwp->dw_mask & DW_vm_page_speculate)
4645 vm_page_speculate(m, TRUE);
4646 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4647 /*
4648 * if we didn't hold the object lock and did this,
4649 * we might disconnect the page, then someone might
4650 * soft fault it back in, then we would put it on the
4651 * cleaned queue, and so we would have a referenced (maybe even dirty)
4652 * page on that queue, which we don't want
4653 */
4654 int refmod_state = pmap_disconnect(m->phys_page);
4655
4656 if ((refmod_state & VM_MEM_REFERENCED)) {
4657 /*
4658 * this page has been touched since it got cleaned; let's activate it
4659 * if it hasn't already been
4660 */
4661 vm_pageout_enqueued_cleaned++;
4662 vm_pageout_cleaned_reactivated++;
4663 vm_pageout_cleaned_commit_reactivated++;
4664
4665 if (m->active == FALSE)
4666 vm_page_activate(m);
4667 } else {
4668 m->reference = FALSE;
4669 vm_page_enqueue_cleaned(m);
4670 }
4671 }
4672 else if (dwp->dw_mask & DW_vm_page_lru)
4673 vm_page_lru(m);
4674 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4675 if ( !m->pageout_queue)
4676 VM_PAGE_QUEUES_REMOVE(m);
4677 }
4678 if (dwp->dw_mask & DW_set_reference)
4679 m->reference = TRUE;
4680 else if (dwp->dw_mask & DW_clear_reference)
4681 m->reference = FALSE;
4682
4683 if (dwp->dw_mask & DW_move_page) {
4684 if ( !m->pageout_queue) {
4685 VM_PAGE_QUEUES_REMOVE(m);
4686
4687 assert(m->object != kernel_object);
4688
4689 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4690 }
4691 }
4692 if (dwp->dw_mask & DW_clear_busy)
4693 m->busy = FALSE;
4694
4695 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4696 PAGE_WAKEUP(m);
4697 }
4698 }
4699 vm_page_unlock_queues();
4700
4701 if (local_free_q)
4702 vm_page_free_list(local_free_q, TRUE);
4703
4704 VM_CHECK_MEMORYSTATUS;
4705
4706 }
4707
4708 kern_return_t
4709 vm_page_alloc_list(
4710 int page_count,
4711 int flags,
4712 vm_page_t *list)
4713 {
4714 vm_page_t lo_page_list = VM_PAGE_NULL;
4715 vm_page_t mem;
4716 int i;
4717
4718 if ( !(flags & KMA_LOMEM))
4719 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4720
4721 for (i = 0; i < page_count; i++) {
4722
4723 mem = vm_page_grablo();
4724
4725 if (mem == VM_PAGE_NULL) {
4726 if (lo_page_list)
4727 vm_page_free_list(lo_page_list, FALSE);
4728
4729 *list = VM_PAGE_NULL;
4730
4731 return (KERN_RESOURCE_SHORTAGE);
4732 }
4733 mem->pageq.next = (queue_entry_t) lo_page_list;
4734 lo_page_list = mem;
4735 }
4736 *list = lo_page_list;
4737
4738 return (KERN_SUCCESS);
4739 }
4740
4741 void
4742 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4743 {
4744 page->offset = offset;
4745 }
4746
4747 vm_page_t
4748 vm_page_get_next(vm_page_t page)
4749 {
4750 return ((vm_page_t) page->pageq.next);
4751 }
4752
4753 vm_object_offset_t
4754 vm_page_get_offset(vm_page_t page)
4755 {
4756 return (page->offset);
4757 }
4758
4759 ppnum_t
4760 vm_page_get_phys_page(vm_page_t page)
4761 {
4762 return (page->phys_page);
4763 }
4764
4765
4766 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4767
4768 #if HIBERNATION
4769
4770 static vm_page_t hibernate_gobble_queue;
4771
4772 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4773
4774 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4775 static int hibernate_flush_dirty_pages(int);
4776 static int hibernate_flush_queue(queue_head_t *, int);
4777
4778 void hibernate_flush_wait(void);
4779 void hibernate_mark_in_progress(void);
4780 void hibernate_clear_in_progress(void);
4781
4782 void hibernate_free_range(int, int);
4783 void hibernate_hash_insert_page(vm_page_t);
4784 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4785 void hibernate_rebuild_vm_structs(void);
4786 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4787 ppnum_t hibernate_lookup_paddr(unsigned int);
4788
4789 struct hibernate_statistics {
4790 int hibernate_considered;
4791 int hibernate_reentered_on_q;
4792 int hibernate_found_dirty;
4793 int hibernate_skipped_cleaning;
4794 int hibernate_skipped_transient;
4795 int hibernate_skipped_precious;
4796 int hibernate_skipped_external;
4797 int hibernate_queue_nolock;
4798 int hibernate_queue_paused;
4799 int hibernate_throttled;
4800 int hibernate_throttle_timeout;
4801 int hibernate_drained;
4802 int hibernate_drain_timeout;
4803 int cd_lock_failed;
4804 int cd_found_precious;
4805 int cd_found_wired;
4806 int cd_found_busy;
4807 int cd_found_unusual;
4808 int cd_found_cleaning;
4809 int cd_found_laundry;
4810 int cd_found_dirty;
4811 int cd_found_xpmapped;
4812 int cd_local_free;
4813 int cd_total_free;
4814 int cd_vm_page_wire_count;
4815 int cd_vm_struct_pages_unneeded;
4816 int cd_pages;
4817 int cd_discarded;
4818 int cd_count_wire;
4819 } hibernate_stats;
4820
4821
4822
4823 static int
4824 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4825 {
4826 wait_result_t wait_result;
4827
4828 vm_page_lock_queues();
4829
4830 while ( !queue_empty(&q->pgo_pending) ) {
4831
4832 q->pgo_draining = TRUE;
4833
4834 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4835
4836 vm_page_unlock_queues();
4837
4838 wait_result = thread_block(THREAD_CONTINUE_NULL);
4839
4840 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
4841 hibernate_stats.hibernate_drain_timeout++;
4842
4843 if (q == &vm_pageout_queue_external)
4844 return (0);
4845
4846 return (1);
4847 }
4848 vm_page_lock_queues();
4849
4850 hibernate_stats.hibernate_drained++;
4851 }
4852 vm_page_unlock_queues();
4853
4854 return (0);
4855 }
4856
4857
4858 boolean_t hibernate_skip_external = FALSE;
4859
4860 static int
4861 hibernate_flush_queue(queue_head_t *q, int qcount)
4862 {
4863 vm_page_t m;
4864 vm_object_t l_object = NULL;
4865 vm_object_t m_object = NULL;
4866 int refmod_state = 0;
4867 int try_failed_count = 0;
4868 int retval = 0;
4869 int current_run = 0;
4870 struct vm_pageout_queue *iq;
4871 struct vm_pageout_queue *eq;
4872 struct vm_pageout_queue *tq;
4873
4874
4875 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4876
4877 iq = &vm_pageout_queue_internal;
4878 eq = &vm_pageout_queue_external;
4879
4880 vm_page_lock_queues();
4881
4882 while (qcount && !queue_empty(q)) {
4883
4884 if (current_run++ == 1000) {
4885 if (hibernate_should_abort()) {
4886 retval = 1;
4887 break;
4888 }
4889 current_run = 0;
4890 }
4891
4892 m = (vm_page_t) queue_first(q);
4893 m_object = m->object;
4894
4895 /*
4896 * check to see if we currently are working
4897 * with the same object... if so, we've
4898 * already got the lock
4899 */
4900 if (m_object != l_object) {
4901 /*
4902 * the object associated with candidate page is
4903 * different from the one we were just working
4904 * with... dump the lock if we still own it
4905 */
4906 if (l_object != NULL) {
4907 vm_object_unlock(l_object);
4908 l_object = NULL;
4909 }
4910 /*
4911 * Try to lock object; since we've alread got the
4912 * page queues lock, we can only 'try' for this one.
4913 * if the 'try' fails, we need to do a mutex_pause
4914 * to allow the owner of the object lock a chance to
4915 * run...
4916 */
4917 if ( !vm_object_lock_try_scan(m_object)) {
4918
4919 if (try_failed_count > 20) {
4920 hibernate_stats.hibernate_queue_nolock++;
4921
4922 goto reenter_pg_on_q;
4923 }
4924 vm_pageout_scan_wants_object = m_object;
4925
4926 vm_page_unlock_queues();
4927 mutex_pause(try_failed_count++);
4928 vm_page_lock_queues();
4929
4930 hibernate_stats.hibernate_queue_paused++;
4931 continue;
4932 } else {
4933 l_object = m_object;
4934 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4935 }
4936 }
4937 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4938 /*
4939 * page is not to be cleaned
4940 * put it back on the head of its queue
4941 */
4942 if (m->cleaning)
4943 hibernate_stats.hibernate_skipped_cleaning++;
4944 else
4945 hibernate_stats.hibernate_skipped_transient++;
4946
4947 goto reenter_pg_on_q;
4948 }
4949 if (m_object->copy == VM_OBJECT_NULL) {
4950 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4951 /*
4952 * let the normal hibernate image path
4953 * deal with these
4954 */
4955 goto reenter_pg_on_q;
4956 }
4957 }
4958 if ( !m->dirty && m->pmapped) {
4959 refmod_state = pmap_get_refmod(m->phys_page);
4960
4961 if ((refmod_state & VM_MEM_MODIFIED)) {
4962 SET_PAGE_DIRTY(m, FALSE);
4963 }
4964 } else
4965 refmod_state = 0;
4966
4967 if ( !m->dirty) {
4968 /*
4969 * page is not to be cleaned
4970 * put it back on the head of its queue
4971 */
4972 if (m->precious)
4973 hibernate_stats.hibernate_skipped_precious++;
4974
4975 goto reenter_pg_on_q;
4976 }
4977
4978 if (hibernate_skip_external == TRUE && !m_object->internal) {
4979
4980 hibernate_stats.hibernate_skipped_external++;
4981
4982 goto reenter_pg_on_q;
4983 }
4984 tq = NULL;
4985
4986 if (m_object->internal) {
4987 if (VM_PAGE_Q_THROTTLED(iq))
4988 tq = iq;
4989 } else if (VM_PAGE_Q_THROTTLED(eq))
4990 tq = eq;
4991
4992 if (tq != NULL) {
4993 wait_result_t wait_result;
4994 int wait_count = 5;
4995
4996 if (l_object != NULL) {
4997 vm_object_unlock(l_object);
4998 l_object = NULL;
4999 }
5000 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5001
5002 while (retval == 0) {
5003
5004 tq->pgo_throttled = TRUE;
5005
5006 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5007
5008 vm_page_unlock_queues();
5009
5010 wait_result = thread_block(THREAD_CONTINUE_NULL);
5011
5012 vm_page_lock_queues();
5013
5014 if (wait_result != THREAD_TIMED_OUT)
5015 break;
5016 if (!VM_PAGE_Q_THROTTLED(tq))
5017 break;
5018
5019 if (hibernate_should_abort())
5020 retval = 1;
5021
5022 if (--wait_count == 0) {
5023
5024 hibernate_stats.hibernate_throttle_timeout++;
5025
5026 if (tq == eq) {
5027 hibernate_skip_external = TRUE;
5028 break;
5029 }
5030 retval = 1;
5031 }
5032 }
5033 if (retval)
5034 break;
5035
5036 hibernate_stats.hibernate_throttled++;
5037
5038 continue;
5039 }
5040 /*
5041 * we've already factored out pages in the laundry which
5042 * means this page can't be on the pageout queue so it's
5043 * safe to do the VM_PAGE_QUEUES_REMOVE
5044 */
5045 assert(!m->pageout_queue);
5046
5047 VM_PAGE_QUEUES_REMOVE(m);
5048
5049 if (COMPRESSED_PAGER_IS_ACTIVE)
5050 pmap_disconnect(m->phys_page);
5051
5052 vm_pageout_cluster(m, FALSE);
5053
5054 hibernate_stats.hibernate_found_dirty++;
5055
5056 goto next_pg;
5057
5058 reenter_pg_on_q:
5059 queue_remove(q, m, vm_page_t, pageq);
5060 queue_enter(q, m, vm_page_t, pageq);
5061
5062 hibernate_stats.hibernate_reentered_on_q++;
5063 next_pg:
5064 hibernate_stats.hibernate_considered++;
5065
5066 qcount--;
5067 try_failed_count = 0;
5068 }
5069 if (l_object != NULL) {
5070 vm_object_unlock(l_object);
5071 l_object = NULL;
5072 }
5073 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5074
5075 vm_page_unlock_queues();
5076
5077 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5078
5079 return (retval);
5080 }
5081
5082
5083 static int
5084 hibernate_flush_dirty_pages(int pass)
5085 {
5086 struct vm_speculative_age_q *aq;
5087 uint32_t i;
5088
5089 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5090
5091 if (vm_page_local_q) {
5092 for (i = 0; i < vm_page_local_q_count; i++)
5093 vm_page_reactivate_local(i, TRUE, FALSE);
5094 }
5095
5096 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5097 int qcount;
5098 vm_page_t m;
5099
5100 aq = &vm_page_queue_speculative[i];
5101
5102 if (queue_empty(&aq->age_q))
5103 continue;
5104 qcount = 0;
5105
5106 vm_page_lockspin_queues();
5107
5108 queue_iterate(&aq->age_q,
5109 m,
5110 vm_page_t,
5111 pageq)
5112 {
5113 qcount++;
5114 }
5115 vm_page_unlock_queues();
5116
5117 if (qcount) {
5118 if (hibernate_flush_queue(&aq->age_q, qcount))
5119 return (1);
5120 }
5121 }
5122 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5123 return (1);
5124 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5125 return (1);
5126 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5127 return (1);
5128 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5129 return (1);
5130
5131 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5132 vm_compressor_record_warmup_start();
5133
5134 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5135 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5136 vm_compressor_record_warmup_end();
5137 return (1);
5138 }
5139 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5140 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5141 vm_compressor_record_warmup_end();
5142 return (1);
5143 }
5144 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5145 vm_compressor_record_warmup_end();
5146
5147 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5148 return (1);
5149
5150 return (0);
5151 }
5152
5153
5154 int
5155 hibernate_flush_memory()
5156 {
5157 int retval;
5158
5159 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5160
5161 hibernate_cleaning_in_progress = TRUE;
5162 hibernate_skip_external = FALSE;
5163
5164 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5165
5166 if (COMPRESSED_PAGER_IS_ACTIVE) {
5167
5168 if ((retval = hibernate_flush_dirty_pages(2)) == 0) {
5169
5170 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5171
5172 vm_compressor_flush();
5173
5174 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5175 }
5176 }
5177 if (retval == 0 && consider_buffer_cache_collect != NULL) {
5178 unsigned int orig_wire_count;
5179
5180 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5181 orig_wire_count = vm_page_wire_count;
5182
5183 (void)(*consider_buffer_cache_collect)(1);
5184 consider_zone_gc(TRUE);
5185
5186 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5187
5188 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5189 }
5190 }
5191 hibernate_cleaning_in_progress = FALSE;
5192
5193 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5194
5195 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5196 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5197
5198
5199 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5200 hibernate_stats.hibernate_considered,
5201 hibernate_stats.hibernate_reentered_on_q,
5202 hibernate_stats.hibernate_found_dirty);
5203 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5204 hibernate_stats.hibernate_skipped_cleaning,
5205 hibernate_stats.hibernate_skipped_transient,
5206 hibernate_stats.hibernate_skipped_precious,
5207 hibernate_stats.hibernate_skipped_external,
5208 hibernate_stats.hibernate_queue_nolock);
5209 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5210 hibernate_stats.hibernate_queue_paused,
5211 hibernate_stats.hibernate_throttled,
5212 hibernate_stats.hibernate_throttle_timeout,
5213 hibernate_stats.hibernate_drained,
5214 hibernate_stats.hibernate_drain_timeout);
5215
5216 return (retval);
5217 }
5218
5219
5220 static void
5221 hibernate_page_list_zero(hibernate_page_list_t *list)
5222 {
5223 uint32_t bank;
5224 hibernate_bitmap_t * bitmap;
5225
5226 bitmap = &list->bank_bitmap[0];
5227 for (bank = 0; bank < list->bank_count; bank++)
5228 {
5229 uint32_t last_bit;
5230
5231 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5232 // set out-of-bound bits at end of bitmap.
5233 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5234 if (last_bit)
5235 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5236
5237 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5238 }
5239 }
5240
5241 void
5242 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5243 {
5244 uint32_t i;
5245 vm_page_t m;
5246 uint64_t start, end, timeout, nsec;
5247 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5248 clock_get_uptime(&start);
5249
5250 for (i = 0; i < gobble_count; i++)
5251 {
5252 while (VM_PAGE_NULL == (m = vm_page_grab()))
5253 {
5254 clock_get_uptime(&end);
5255 if (end >= timeout)
5256 break;
5257 VM_PAGE_WAIT();
5258 }
5259 if (!m)
5260 break;
5261 m->busy = FALSE;
5262 vm_page_gobble(m);
5263
5264 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5265 hibernate_gobble_queue = m;
5266 }
5267
5268 clock_get_uptime(&end);
5269 absolutetime_to_nanoseconds(end - start, &nsec);
5270 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5271 }
5272
5273 void
5274 hibernate_free_gobble_pages(void)
5275 {
5276 vm_page_t m, next;
5277 uint32_t count = 0;
5278
5279 m = (vm_page_t) hibernate_gobble_queue;
5280 while(m)
5281 {
5282 next = (vm_page_t) m->pageq.next;
5283 vm_page_free(m);
5284 count++;
5285 m = next;
5286 }
5287 hibernate_gobble_queue = VM_PAGE_NULL;
5288
5289 if (count)
5290 HIBLOG("Freed %d pages\n", count);
5291 }
5292
5293 static boolean_t
5294 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5295 {
5296 vm_object_t object = NULL;
5297 int refmod_state;
5298 boolean_t discard = FALSE;
5299
5300 do
5301 {
5302 if (m->private)
5303 panic("hibernate_consider_discard: private");
5304
5305 if (!vm_object_lock_try(m->object)) {
5306 if (!preflight) hibernate_stats.cd_lock_failed++;
5307 break;
5308 }
5309 object = m->object;
5310
5311 if (VM_PAGE_WIRED(m)) {
5312 if (!preflight) hibernate_stats.cd_found_wired++;
5313 break;
5314 }
5315 if (m->precious) {
5316 if (!preflight) hibernate_stats.cd_found_precious++;
5317 break;
5318 }
5319 if (m->busy || !object->alive) {
5320 /*
5321 * Somebody is playing with this page.
5322 */
5323 if (!preflight) hibernate_stats.cd_found_busy++;
5324 break;
5325 }
5326 if (m->absent || m->unusual || m->error) {
5327 /*
5328 * If it's unusual in anyway, ignore it
5329 */
5330 if (!preflight) hibernate_stats.cd_found_unusual++;
5331 break;
5332 }
5333 if (m->cleaning) {
5334 if (!preflight) hibernate_stats.cd_found_cleaning++;
5335 break;
5336 }
5337 if (m->laundry) {
5338 if (!preflight) hibernate_stats.cd_found_laundry++;
5339 break;
5340 }
5341 if (!m->dirty)
5342 {
5343 refmod_state = pmap_get_refmod(m->phys_page);
5344
5345 if (refmod_state & VM_MEM_REFERENCED)
5346 m->reference = TRUE;
5347 if (refmod_state & VM_MEM_MODIFIED) {
5348 SET_PAGE_DIRTY(m, FALSE);
5349 }
5350 }
5351
5352 /*
5353 * If it's clean or purgeable we can discard the page on wakeup.
5354 */
5355 discard = (!m->dirty)
5356 || (VM_PURGABLE_VOLATILE == object->purgable)
5357 || (VM_PURGABLE_EMPTY == object->purgable);
5358
5359
5360 if (discard == FALSE) {
5361 if (!preflight)
5362 hibernate_stats.cd_found_dirty++;
5363 } else if (m->xpmapped && m->reference) {
5364 if (!preflight)
5365 hibernate_stats.cd_found_xpmapped++;
5366 discard = FALSE;
5367 }
5368 }
5369 while (FALSE);
5370
5371 if (object)
5372 vm_object_unlock(object);
5373
5374 return (discard);
5375 }
5376
5377
5378 static void
5379 hibernate_discard_page(vm_page_t m)
5380 {
5381 if (m->absent || m->unusual || m->error)
5382 /*
5383 * If it's unusual in anyway, ignore
5384 */
5385 return;
5386
5387 #if DEBUG
5388 vm_object_t object = m->object;
5389 if (!vm_object_lock_try(m->object))
5390 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5391 #else
5392 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5393 makes sure these locks are uncontended before sleep */
5394 #endif /* !DEBUG */
5395
5396 if (m->pmapped == TRUE)
5397 {
5398 __unused int refmod_state = pmap_disconnect(m->phys_page);
5399 }
5400
5401 if (m->laundry)
5402 panic("hibernate_discard_page(%p) laundry", m);
5403 if (m->private)
5404 panic("hibernate_discard_page(%p) private", m);
5405 if (m->fictitious)
5406 panic("hibernate_discard_page(%p) fictitious", m);
5407
5408 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5409 {
5410 /* object should be on a queue */
5411 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5412 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5413 assert(old_queue);
5414 if (m->object->purgeable_when_ripe) {
5415 vm_purgeable_token_delete_first(old_queue);
5416 }
5417 m->object->purgable = VM_PURGABLE_EMPTY;
5418 }
5419
5420 vm_page_free(m);
5421
5422 #if DEBUG
5423 vm_object_unlock(object);
5424 #endif /* DEBUG */
5425 }
5426
5427 /*
5428 Grab locks for hibernate_page_list_setall()
5429 */
5430 void
5431 hibernate_vm_lock_queues(void)
5432 {
5433 vm_object_lock(compressor_object);
5434 vm_page_lock_queues();
5435 lck_mtx_lock(&vm_page_queue_free_lock);
5436
5437 if (vm_page_local_q) {
5438 uint32_t i;
5439 for (i = 0; i < vm_page_local_q_count; i++) {
5440 struct vpl *lq;
5441 lq = &vm_page_local_q[i].vpl_un.vpl;
5442 VPL_LOCK(&lq->vpl_lock);
5443 }
5444 }
5445 }
5446
5447 void
5448 hibernate_vm_unlock_queues(void)
5449 {
5450 if (vm_page_local_q) {
5451 uint32_t i;
5452 for (i = 0; i < vm_page_local_q_count; i++) {
5453 struct vpl *lq;
5454 lq = &vm_page_local_q[i].vpl_un.vpl;
5455 VPL_UNLOCK(&lq->vpl_lock);
5456 }
5457 }
5458 lck_mtx_unlock(&vm_page_queue_free_lock);
5459 vm_page_unlock_queues();
5460 vm_object_unlock(compressor_object);
5461 }
5462
5463 /*
5464 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5465 pages known to VM to not need saving are subtracted.
5466 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5467 */
5468
5469 void
5470 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5471 hibernate_page_list_t * page_list_wired,
5472 hibernate_page_list_t * page_list_pal,
5473 boolean_t preflight,
5474 boolean_t will_discard,
5475 uint32_t * pagesOut)
5476 {
5477 uint64_t start, end, nsec;
5478 vm_page_t m;
5479 vm_page_t next;
5480 uint32_t pages = page_list->page_count;
5481 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5482 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5483 uint32_t count_wire = pages;
5484 uint32_t count_discard_active = 0;
5485 uint32_t count_discard_inactive = 0;
5486 uint32_t count_discard_cleaned = 0;
5487 uint32_t count_discard_purgeable = 0;
5488 uint32_t count_discard_speculative = 0;
5489 uint32_t count_discard_vm_struct_pages = 0;
5490 uint32_t i;
5491 uint32_t bank;
5492 hibernate_bitmap_t * bitmap;
5493 hibernate_bitmap_t * bitmap_wired;
5494 boolean_t discard_all;
5495 boolean_t discard;
5496
5497 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5498
5499 if (preflight) {
5500 page_list = NULL;
5501 page_list_wired = NULL;
5502 page_list_pal = NULL;
5503 discard_all = FALSE;
5504 } else {
5505 discard_all = will_discard;
5506 }
5507
5508 #if DEBUG
5509 if (!preflight)
5510 {
5511 vm_page_lock_queues();
5512 if (vm_page_local_q) {
5513 for (i = 0; i < vm_page_local_q_count; i++) {
5514 struct vpl *lq;
5515 lq = &vm_page_local_q[i].vpl_un.vpl;
5516 VPL_LOCK(&lq->vpl_lock);
5517 }
5518 }
5519 }
5520 #endif /* DEBUG */
5521
5522
5523 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5524
5525 clock_get_uptime(&start);
5526
5527 if (!preflight) {
5528 hibernate_page_list_zero(page_list);
5529 hibernate_page_list_zero(page_list_wired);
5530 hibernate_page_list_zero(page_list_pal);
5531
5532 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5533 hibernate_stats.cd_pages = pages;
5534 }
5535
5536 if (vm_page_local_q) {
5537 for (i = 0; i < vm_page_local_q_count; i++)
5538 vm_page_reactivate_local(i, TRUE, !preflight);
5539 }
5540
5541 if (preflight) {
5542 vm_object_lock(compressor_object);
5543 vm_page_lock_queues();
5544 lck_mtx_lock(&vm_page_queue_free_lock);
5545 }
5546
5547 m = (vm_page_t) hibernate_gobble_queue;
5548 while (m)
5549 {
5550 pages--;
5551 count_wire--;
5552 if (!preflight) {
5553 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5554 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5555 }
5556 m = (vm_page_t) m->pageq.next;
5557 }
5558
5559 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5560 {
5561 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5562 {
5563 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5564 {
5565 pages--;
5566 count_wire--;
5567 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5568 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5569
5570 hibernate_stats.cd_local_free++;
5571 hibernate_stats.cd_total_free++;
5572 }
5573 }
5574 }
5575
5576 for( i = 0; i < vm_colors; i++ )
5577 {
5578 queue_iterate(&vm_page_queue_free[i],
5579 m,
5580 vm_page_t,
5581 pageq)
5582 {
5583 pages--;
5584 count_wire--;
5585 if (!preflight) {
5586 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5587 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5588
5589 hibernate_stats.cd_total_free++;
5590 }
5591 }
5592 }
5593
5594 queue_iterate(&vm_lopage_queue_free,
5595 m,
5596 vm_page_t,
5597 pageq)
5598 {
5599 pages--;
5600 count_wire--;
5601 if (!preflight) {
5602 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5603 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5604
5605 hibernate_stats.cd_total_free++;
5606 }
5607 }
5608
5609 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5610 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5611 {
5612 next = (vm_page_t) m->pageq.next;
5613 discard = FALSE;
5614 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5615 && hibernate_consider_discard(m, preflight))
5616 {
5617 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5618 count_discard_inactive++;
5619 discard = discard_all;
5620 }
5621 else
5622 count_throttled++;
5623 count_wire--;
5624 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5625
5626 if (discard) hibernate_discard_page(m);
5627 m = next;
5628 }
5629
5630 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5631 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5632 {
5633 next = (vm_page_t) m->pageq.next;
5634 discard = FALSE;
5635 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5636 && hibernate_consider_discard(m, preflight))
5637 {
5638 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5639 if (m->dirty)
5640 count_discard_purgeable++;
5641 else
5642 count_discard_inactive++;
5643 discard = discard_all;
5644 }
5645 else
5646 count_anonymous++;
5647 count_wire--;
5648 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5649 if (discard) hibernate_discard_page(m);
5650 m = next;
5651 }
5652
5653 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5654 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5655 {
5656 next = (vm_page_t) m->pageq.next;
5657 discard = FALSE;
5658 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5659 && hibernate_consider_discard(m, preflight))
5660 {
5661 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5662 if (m->dirty)
5663 count_discard_purgeable++;
5664 else
5665 count_discard_inactive++;
5666 discard = discard_all;
5667 }
5668 else
5669 count_inactive++;
5670 count_wire--;
5671 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5672 if (discard) hibernate_discard_page(m);
5673 m = next;
5674 }
5675
5676 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5677 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5678 {
5679 next = (vm_page_t) m->pageq.next;
5680 discard = FALSE;
5681 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5682 && hibernate_consider_discard(m, preflight))
5683 {
5684 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5685 if (m->dirty)
5686 count_discard_purgeable++;
5687 else
5688 count_discard_cleaned++;
5689 discard = discard_all;
5690 }
5691 else
5692 count_cleaned++;
5693 count_wire--;
5694 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5695 if (discard) hibernate_discard_page(m);
5696 m = next;
5697 }
5698
5699 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5700 {
5701 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5702 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5703 {
5704 next = (vm_page_t) m->pageq.next;
5705 discard = FALSE;
5706 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5707 && hibernate_consider_discard(m, preflight))
5708 {
5709 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5710 count_discard_speculative++;
5711 discard = discard_all;
5712 }
5713 else
5714 count_speculative++;
5715 count_wire--;
5716 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5717 if (discard) hibernate_discard_page(m);
5718 m = next;
5719 }
5720 }
5721
5722 m = (vm_page_t) queue_first(&vm_page_queue_active);
5723 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5724 {
5725 next = (vm_page_t) m->pageq.next;
5726 discard = FALSE;
5727 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5728 && hibernate_consider_discard(m, preflight))
5729 {
5730 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5731 if (m->dirty)
5732 count_discard_purgeable++;
5733 else
5734 count_discard_active++;
5735 discard = discard_all;
5736 }
5737 else
5738 count_active++;
5739 count_wire--;
5740 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5741 if (discard) hibernate_discard_page(m);
5742 m = next;
5743 }
5744
5745 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5746 {
5747 count_compressor++;
5748 count_wire--;
5749 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5750 }
5751
5752 if (preflight == FALSE && discard_all == TRUE) {
5753 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5754
5755 HIBLOG("hibernate_teardown started\n");
5756 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5757 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5758
5759 pages -= count_discard_vm_struct_pages;
5760 count_wire -= count_discard_vm_struct_pages;
5761
5762 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5763
5764 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5765 }
5766
5767 if (!preflight) {
5768 // pull wired from hibernate_bitmap
5769 bitmap = &page_list->bank_bitmap[0];
5770 bitmap_wired = &page_list_wired->bank_bitmap[0];
5771 for (bank = 0; bank < page_list->bank_count; bank++)
5772 {
5773 for (i = 0; i < bitmap->bitmapwords; i++)
5774 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5775 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5776 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5777 }
5778 }
5779
5780 // machine dependent adjustments
5781 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5782
5783 if (!preflight) {
5784 hibernate_stats.cd_count_wire = count_wire;
5785 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
5786 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
5787 }
5788
5789 clock_get_uptime(&end);
5790 absolutetime_to_nanoseconds(end - start, &nsec);
5791 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5792
5793 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5794 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
5795 discard_all ? "did" : "could",
5796 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5797
5798 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5799
5800 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
5801
5802 #if DEBUG
5803 if (!preflight)
5804 {
5805 if (vm_page_local_q) {
5806 for (i = 0; i < vm_page_local_q_count; i++) {
5807 struct vpl *lq;
5808 lq = &vm_page_local_q[i].vpl_un.vpl;
5809 VPL_UNLOCK(&lq->vpl_lock);
5810 }
5811 }
5812 vm_page_unlock_queues();
5813 }
5814 #endif /* DEBUG */
5815
5816 if (preflight) {
5817 lck_mtx_unlock(&vm_page_queue_free_lock);
5818 vm_page_unlock_queues();
5819 vm_object_unlock(compressor_object);
5820 }
5821
5822 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5823 }
5824
5825 void
5826 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5827 {
5828 uint64_t start, end, nsec;
5829 vm_page_t m;
5830 vm_page_t next;
5831 uint32_t i;
5832 uint32_t count_discard_active = 0;
5833 uint32_t count_discard_inactive = 0;
5834 uint32_t count_discard_purgeable = 0;
5835 uint32_t count_discard_cleaned = 0;
5836 uint32_t count_discard_speculative = 0;
5837
5838
5839 #if DEBUG
5840 vm_page_lock_queues();
5841 if (vm_page_local_q) {
5842 for (i = 0; i < vm_page_local_q_count; i++) {
5843 struct vpl *lq;
5844 lq = &vm_page_local_q[i].vpl_un.vpl;
5845 VPL_LOCK(&lq->vpl_lock);
5846 }
5847 }
5848 #endif /* DEBUG */
5849
5850 clock_get_uptime(&start);
5851
5852 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5853 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5854 {
5855 next = (vm_page_t) m->pageq.next;
5856 if (hibernate_page_bittst(page_list, m->phys_page))
5857 {
5858 if (m->dirty)
5859 count_discard_purgeable++;
5860 else
5861 count_discard_inactive++;
5862 hibernate_discard_page(m);
5863 }
5864 m = next;
5865 }
5866
5867 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5868 {
5869 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5870 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5871 {
5872 next = (vm_page_t) m->pageq.next;
5873 if (hibernate_page_bittst(page_list, m->phys_page))
5874 {
5875 count_discard_speculative++;
5876 hibernate_discard_page(m);
5877 }
5878 m = next;
5879 }
5880 }
5881
5882 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5883 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5884 {
5885 next = (vm_page_t) m->pageq.next;
5886 if (hibernate_page_bittst(page_list, m->phys_page))
5887 {
5888 if (m->dirty)
5889 count_discard_purgeable++;
5890 else
5891 count_discard_inactive++;
5892 hibernate_discard_page(m);
5893 }
5894 m = next;
5895 }
5896
5897 m = (vm_page_t) queue_first(&vm_page_queue_active);
5898 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5899 {
5900 next = (vm_page_t) m->pageq.next;
5901 if (hibernate_page_bittst(page_list, m->phys_page))
5902 {
5903 if (m->dirty)
5904 count_discard_purgeable++;
5905 else
5906 count_discard_active++;
5907 hibernate_discard_page(m);
5908 }
5909 m = next;
5910 }
5911
5912 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5913 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5914 {
5915 next = (vm_page_t) m->pageq.next;
5916 if (hibernate_page_bittst(page_list, m->phys_page))
5917 {
5918 if (m->dirty)
5919 count_discard_purgeable++;
5920 else
5921 count_discard_cleaned++;
5922 hibernate_discard_page(m);
5923 }
5924 m = next;
5925 }
5926
5927 #if DEBUG
5928 if (vm_page_local_q) {
5929 for (i = 0; i < vm_page_local_q_count; i++) {
5930 struct vpl *lq;
5931 lq = &vm_page_local_q[i].vpl_un.vpl;
5932 VPL_UNLOCK(&lq->vpl_lock);
5933 }
5934 }
5935 vm_page_unlock_queues();
5936 #endif /* DEBUG */
5937
5938 clock_get_uptime(&end);
5939 absolutetime_to_nanoseconds(end - start, &nsec);
5940 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5941 nsec / 1000000ULL,
5942 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5943 }
5944
5945 boolean_t hibernate_paddr_map_inited = FALSE;
5946 boolean_t hibernate_rebuild_needed = FALSE;
5947 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
5948 vm_page_t hibernate_rebuild_hash_list = NULL;
5949
5950 unsigned int hibernate_teardown_found_tabled_pages = 0;
5951 unsigned int hibernate_teardown_found_created_pages = 0;
5952 unsigned int hibernate_teardown_found_free_pages = 0;
5953 unsigned int hibernate_teardown_vm_page_free_count;
5954
5955
5956 struct ppnum_mapping {
5957 struct ppnum_mapping *ppnm_next;
5958 ppnum_t ppnm_base_paddr;
5959 unsigned int ppnm_sindx;
5960 unsigned int ppnm_eindx;
5961 };
5962
5963 struct ppnum_mapping *ppnm_head;
5964 struct ppnum_mapping *ppnm_last_found = NULL;
5965
5966
5967 void
5968 hibernate_create_paddr_map()
5969 {
5970 unsigned int i;
5971 ppnum_t next_ppnum_in_run = 0;
5972 struct ppnum_mapping *ppnm = NULL;
5973
5974 if (hibernate_paddr_map_inited == FALSE) {
5975
5976 for (i = 0; i < vm_pages_count; i++) {
5977
5978 if (ppnm)
5979 ppnm->ppnm_eindx = i;
5980
5981 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
5982
5983 ppnm = kalloc(sizeof(struct ppnum_mapping));
5984
5985 ppnm->ppnm_next = ppnm_head;
5986 ppnm_head = ppnm;
5987
5988 ppnm->ppnm_sindx = i;
5989 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
5990 }
5991 next_ppnum_in_run = vm_pages[i].phys_page + 1;
5992 }
5993 ppnm->ppnm_eindx++;
5994
5995 hibernate_paddr_map_inited = TRUE;
5996 }
5997 }
5998
5999 ppnum_t
6000 hibernate_lookup_paddr(unsigned int indx)
6001 {
6002 struct ppnum_mapping *ppnm = NULL;
6003
6004 ppnm = ppnm_last_found;
6005
6006 if (ppnm) {
6007 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6008 goto done;
6009 }
6010 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6011
6012 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6013 ppnm_last_found = ppnm;
6014 break;
6015 }
6016 }
6017 if (ppnm == NULL)
6018 panic("hibernate_lookup_paddr of %d failed\n", indx);
6019 done:
6020 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6021 }
6022
6023
6024 uint32_t
6025 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6026 {
6027 addr64_t saddr_aligned;
6028 addr64_t eaddr_aligned;
6029 addr64_t addr;
6030 ppnum_t paddr;
6031 unsigned int mark_as_unneeded_pages = 0;
6032
6033 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6034 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6035
6036 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6037
6038 paddr = pmap_find_phys(kernel_pmap, addr);
6039
6040 assert(paddr);
6041
6042 hibernate_page_bitset(page_list, TRUE, paddr);
6043 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6044
6045 mark_as_unneeded_pages++;
6046 }
6047 return (mark_as_unneeded_pages);
6048 }
6049
6050
6051 void
6052 hibernate_hash_insert_page(vm_page_t mem)
6053 {
6054 vm_page_bucket_t *bucket;
6055 int hash_id;
6056
6057 assert(mem->hashed);
6058 assert(mem->object);
6059 assert(mem->offset != (vm_object_offset_t) -1);
6060
6061 /*
6062 * Insert it into the object_object/offset hash table
6063 */
6064 hash_id = vm_page_hash(mem->object, mem->offset);
6065 bucket = &vm_page_buckets[hash_id];
6066
6067 mem->next = bucket->pages;
6068 bucket->pages = mem;
6069 }
6070
6071
6072 void
6073 hibernate_free_range(int sindx, int eindx)
6074 {
6075 vm_page_t mem;
6076 unsigned int color;
6077
6078 while (sindx < eindx) {
6079 mem = &vm_pages[sindx];
6080
6081 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6082
6083 mem->lopage = FALSE;
6084 mem->free = TRUE;
6085
6086 color = mem->phys_page & vm_color_mask;
6087 queue_enter_first(&vm_page_queue_free[color],
6088 mem,
6089 vm_page_t,
6090 pageq);
6091 vm_page_free_count++;
6092
6093 sindx++;
6094 }
6095 }
6096
6097
6098 extern void hibernate_rebuild_pmap_structs(void);
6099
6100 void
6101 hibernate_rebuild_vm_structs(void)
6102 {
6103 int cindx, sindx, eindx;
6104 vm_page_t mem, tmem, mem_next;
6105 AbsoluteTime startTime, endTime;
6106 uint64_t nsec;
6107
6108 if (hibernate_rebuild_needed == FALSE)
6109 return;
6110
6111 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6112 HIBLOG("hibernate_rebuild started\n");
6113
6114 clock_get_uptime(&startTime);
6115
6116 hibernate_rebuild_pmap_structs();
6117
6118 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6119 eindx = vm_pages_count;
6120
6121 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6122
6123 mem = &vm_pages[cindx];
6124 /*
6125 * hibernate_teardown_vm_structs leaves the location where
6126 * this vm_page_t must be located in "next".
6127 */
6128 tmem = mem->next;
6129 mem->next = NULL;
6130
6131 sindx = (int)(tmem - &vm_pages[0]);
6132
6133 if (mem != tmem) {
6134 /*
6135 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6136 * so move it back to its real location
6137 */
6138 *tmem = *mem;
6139 mem = tmem;
6140 }
6141 if (mem->hashed)
6142 hibernate_hash_insert_page(mem);
6143 /*
6144 * the 'hole' between this vm_page_t and the previous
6145 * vm_page_t we moved needs to be initialized as
6146 * a range of free vm_page_t's
6147 */
6148 hibernate_free_range(sindx + 1, eindx);
6149
6150 eindx = sindx;
6151 }
6152 if (sindx)
6153 hibernate_free_range(0, sindx);
6154
6155 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6156
6157 /*
6158 * process the list of vm_page_t's that were entered in the hash,
6159 * but were not located in the vm_pages arrary... these are
6160 * vm_page_t's that were created on the fly (i.e. fictitious)
6161 */
6162 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6163 mem_next = mem->next;
6164
6165 mem->next = NULL;
6166 hibernate_hash_insert_page(mem);
6167 }
6168 hibernate_rebuild_hash_list = NULL;
6169
6170 clock_get_uptime(&endTime);
6171 SUB_ABSOLUTETIME(&endTime, &startTime);
6172 absolutetime_to_nanoseconds(endTime, &nsec);
6173
6174 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6175
6176 hibernate_rebuild_needed = FALSE;
6177
6178 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6179 }
6180
6181
6182 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6183
6184 uint32_t
6185 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6186 {
6187 unsigned int i;
6188 unsigned int compact_target_indx;
6189 vm_page_t mem, mem_next;
6190 vm_page_bucket_t *bucket;
6191 unsigned int mark_as_unneeded_pages = 0;
6192 unsigned int unneeded_vm_page_bucket_pages = 0;
6193 unsigned int unneeded_vm_pages_pages = 0;
6194 unsigned int unneeded_pmap_pages = 0;
6195 addr64_t start_of_unneeded = 0;
6196 addr64_t end_of_unneeded = 0;
6197
6198
6199 if (hibernate_should_abort())
6200 return (0);
6201
6202 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6203 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6204 vm_page_cleaned_count, compressor_object->resident_page_count);
6205
6206 for (i = 0; i < vm_page_bucket_count; i++) {
6207
6208 bucket = &vm_page_buckets[i];
6209
6210 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) {
6211 assert(mem->hashed);
6212
6213 mem_next = mem->next;
6214
6215 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6216 mem->next = hibernate_rebuild_hash_list;
6217 hibernate_rebuild_hash_list = mem;
6218 }
6219 }
6220 }
6221 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6222 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6223
6224 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6225
6226 compact_target_indx = 0;
6227
6228 for (i = 0; i < vm_pages_count; i++) {
6229
6230 mem = &vm_pages[i];
6231
6232 if (mem->free) {
6233 unsigned int color;
6234
6235 assert(mem->busy);
6236 assert(!mem->lopage);
6237
6238 color = mem->phys_page & vm_color_mask;
6239
6240 queue_remove(&vm_page_queue_free[color],
6241 mem,
6242 vm_page_t,
6243 pageq);
6244 mem->pageq.next = NULL;
6245 mem->pageq.prev = NULL;
6246
6247 vm_page_free_count--;
6248
6249 hibernate_teardown_found_free_pages++;
6250
6251 if ( !vm_pages[compact_target_indx].free)
6252 compact_target_indx = i;
6253 } else {
6254 /*
6255 * record this vm_page_t's original location
6256 * we need this even if it doesn't get moved
6257 * as an indicator to the rebuild function that
6258 * we don't have to move it
6259 */
6260 mem->next = mem;
6261
6262 if (vm_pages[compact_target_indx].free) {
6263 /*
6264 * we've got a hole to fill, so
6265 * move this vm_page_t to it's new home
6266 */
6267 vm_pages[compact_target_indx] = *mem;
6268 mem->free = TRUE;
6269
6270 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6271 compact_target_indx++;
6272 } else
6273 hibernate_teardown_last_valid_compact_indx = i;
6274 }
6275 }
6276 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6277 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6278 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6279
6280 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6281
6282 if (start_of_unneeded) {
6283 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6284 mark_as_unneeded_pages += unneeded_pmap_pages;
6285 }
6286 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6287
6288 hibernate_rebuild_needed = TRUE;
6289
6290 return (mark_as_unneeded_pages);
6291 }
6292
6293
6294 #endif /* HIBERNATION */
6295
6296 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6297
6298 #include <mach_vm_debug.h>
6299 #if MACH_VM_DEBUG
6300
6301 #include <mach_debug/hash_info.h>
6302 #include <vm/vm_debug.h>
6303
6304 /*
6305 * Routine: vm_page_info
6306 * Purpose:
6307 * Return information about the global VP table.
6308 * Fills the buffer with as much information as possible
6309 * and returns the desired size of the buffer.
6310 * Conditions:
6311 * Nothing locked. The caller should provide
6312 * possibly-pageable memory.
6313 */
6314
6315 unsigned int
6316 vm_page_info(
6317 hash_info_bucket_t *info,
6318 unsigned int count)
6319 {
6320 unsigned int i;
6321 lck_spin_t *bucket_lock;
6322
6323 if (vm_page_bucket_count < count)
6324 count = vm_page_bucket_count;
6325
6326 for (i = 0; i < count; i++) {
6327 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6328 unsigned int bucket_count = 0;
6329 vm_page_t m;
6330
6331 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6332 lck_spin_lock(bucket_lock);
6333
6334 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
6335 bucket_count++;
6336
6337 lck_spin_unlock(bucket_lock);
6338
6339 /* don't touch pageable memory while holding locks */
6340 info[i].hib_count = bucket_count;
6341 }
6342
6343 return vm_page_bucket_count;
6344 }
6345 #endif /* MACH_VM_DEBUG */
6346
6347 #if VM_PAGE_BUCKETS_CHECK
6348 void
6349 vm_page_buckets_check(void)
6350 {
6351 unsigned int i;
6352 vm_page_t p;
6353 unsigned int p_hash;
6354 vm_page_bucket_t *bucket;
6355 lck_spin_t *bucket_lock;
6356
6357 if (!vm_page_buckets_check_ready) {
6358 return;
6359 }
6360
6361 #if HIBERNATION
6362 if (hibernate_rebuild_needed ||
6363 hibernate_rebuild_hash_list) {
6364 panic("BUCKET_CHECK: hibernation in progress: "
6365 "rebuild_needed=%d rebuild_hash_list=%p\n",
6366 hibernate_rebuild_needed,
6367 hibernate_rebuild_hash_list);
6368 }
6369 #endif /* HIBERNATION */
6370
6371 #if VM_PAGE_FAKE_BUCKETS
6372 char *cp;
6373 for (cp = (char *) vm_page_fake_buckets_start;
6374 cp < (char *) vm_page_fake_buckets_end;
6375 cp++) {
6376 if (*cp != 0x5a) {
6377 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6378 "[0x%llx:0x%llx]\n",
6379 cp,
6380 vm_page_fake_buckets_start,
6381 vm_page_fake_buckets_end);
6382 }
6383 }
6384 #endif /* VM_PAGE_FAKE_BUCKETS */
6385
6386 for (i = 0; i < vm_page_bucket_count; i++) {
6387 bucket = &vm_page_buckets[i];
6388 if (bucket->pages == VM_PAGE_NULL) {
6389 continue;
6390 }
6391
6392 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6393 lck_spin_lock(bucket_lock);
6394 p = bucket->pages;
6395 while (p != VM_PAGE_NULL) {
6396 if (!p->hashed) {
6397 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6398 "hash %d in bucket %d at %p "
6399 "is not hashed\n",
6400 p, p->object, p->offset,
6401 p_hash, i, bucket);
6402 }
6403 p_hash = vm_page_hash(p->object, p->offset);
6404 if (p_hash != i) {
6405 panic("BUCKET_CHECK: corruption in bucket %d "
6406 "at %p: page %p object %p offset 0x%llx "
6407 "hash %d\n",
6408 i, bucket, p, p->object, p->offset,
6409 p_hash);
6410 }
6411 p = p->next;
6412 }
6413 lck_spin_unlock(bucket_lock);
6414 }
6415
6416 // printf("BUCKET_CHECK: checked buckets\n");
6417 }
6418 #endif /* VM_PAGE_BUCKETS_CHECK */