]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
32271953ab9f3cb2be1f2ada190593db18f6a6fa
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <pexpert/pexpert.h>
89
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
93 #include <vm/vm_compressor.h>
94
95 #include <IOKit/IOHibernatePrivate.h>
96
97 #include <sys/kdebug.h>
98
99 boolean_t hibernate_cleaning_in_progress = FALSE;
100 boolean_t vm_page_free_verify = TRUE;
101
102 uint32_t vm_lopage_free_count = 0;
103 uint32_t vm_lopage_free_limit = 0;
104 uint32_t vm_lopage_lowater = 0;
105 boolean_t vm_lopage_refill = FALSE;
106 boolean_t vm_lopage_needed = FALSE;
107
108 lck_mtx_ext_t vm_page_queue_lock_ext;
109 lck_mtx_ext_t vm_page_queue_free_lock_ext;
110 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
111
112 int speculative_age_index = 0;
113 int speculative_steal_index = 0;
114 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
115
116
117 __private_extern__ void vm_page_init_lck_grp(void);
118
119 static void vm_page_free_prepare(vm_page_t page);
120 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
121
122
123
124
125 /*
126 * Associated with page of user-allocatable memory is a
127 * page structure.
128 */
129
130 /*
131 * These variables record the values returned by vm_page_bootstrap,
132 * for debugging purposes. The implementation of pmap_steal_memory
133 * and pmap_startup here also uses them internally.
134 */
135
136 vm_offset_t virtual_space_start;
137 vm_offset_t virtual_space_end;
138 uint32_t vm_page_pages;
139
140 /*
141 * The vm_page_lookup() routine, which provides for fast
142 * (virtual memory object, offset) to page lookup, employs
143 * the following hash table. The vm_page_{insert,remove}
144 * routines install and remove associations in the table.
145 * [This table is often called the virtual-to-physical,
146 * or VP, table.]
147 */
148 typedef struct {
149 vm_page_t pages;
150 #if MACH_PAGE_HASH_STATS
151 int cur_count; /* current count */
152 int hi_count; /* high water mark */
153 #endif /* MACH_PAGE_HASH_STATS */
154 } vm_page_bucket_t;
155
156
157 #define BUCKETS_PER_LOCK 16
158
159 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
160 unsigned int vm_page_bucket_count = 0; /* How big is array? */
161 unsigned int vm_page_hash_mask; /* Mask for hash function */
162 unsigned int vm_page_hash_shift; /* Shift for hash function */
163 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
164 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
165
166 lck_spin_t *vm_page_bucket_locks;
167
168 #if VM_PAGE_BUCKETS_CHECK
169 boolean_t vm_page_buckets_check_ready = FALSE;
170 #if VM_PAGE_FAKE_BUCKETS
171 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
172 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
173 #endif /* VM_PAGE_FAKE_BUCKETS */
174 #endif /* VM_PAGE_BUCKETS_CHECK */
175
176 #if MACH_PAGE_HASH_STATS
177 /* This routine is only for debug. It is intended to be called by
178 * hand by a developer using a kernel debugger. This routine prints
179 * out vm_page_hash table statistics to the kernel debug console.
180 */
181 void
182 hash_debug(void)
183 {
184 int i;
185 int numbuckets = 0;
186 int highsum = 0;
187 int maxdepth = 0;
188
189 for (i = 0; i < vm_page_bucket_count; i++) {
190 if (vm_page_buckets[i].hi_count) {
191 numbuckets++;
192 highsum += vm_page_buckets[i].hi_count;
193 if (vm_page_buckets[i].hi_count > maxdepth)
194 maxdepth = vm_page_buckets[i].hi_count;
195 }
196 }
197 printf("Total number of buckets: %d\n", vm_page_bucket_count);
198 printf("Number used buckets: %d = %d%%\n",
199 numbuckets, 100*numbuckets/vm_page_bucket_count);
200 printf("Number unused buckets: %d = %d%%\n",
201 vm_page_bucket_count - numbuckets,
202 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
203 printf("Sum of bucket max depth: %d\n", highsum);
204 printf("Average bucket depth: %d.%2d\n",
205 highsum/vm_page_bucket_count,
206 highsum%vm_page_bucket_count);
207 printf("Maximum bucket depth: %d\n", maxdepth);
208 }
209 #endif /* MACH_PAGE_HASH_STATS */
210
211 /*
212 * The virtual page size is currently implemented as a runtime
213 * variable, but is constant once initialized using vm_set_page_size.
214 * This initialization must be done in the machine-dependent
215 * bootstrap sequence, before calling other machine-independent
216 * initializations.
217 *
218 * All references to the virtual page size outside this
219 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
220 * constants.
221 */
222 vm_size_t page_size = PAGE_SIZE;
223 vm_size_t page_mask = PAGE_MASK;
224 int page_shift = PAGE_SHIFT;
225
226 /*
227 * Resident page structures are initialized from
228 * a template (see vm_page_alloc).
229 *
230 * When adding a new field to the virtual memory
231 * object structure, be sure to add initialization
232 * (see vm_page_bootstrap).
233 */
234 struct vm_page vm_page_template;
235
236 vm_page_t vm_pages = VM_PAGE_NULL;
237 unsigned int vm_pages_count = 0;
238 ppnum_t vm_page_lowest = 0;
239
240 /*
241 * Resident pages that represent real memory
242 * are allocated from a set of free lists,
243 * one per color.
244 */
245 unsigned int vm_colors;
246 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
247 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
248 queue_head_t vm_page_queue_free[MAX_COLORS];
249 unsigned int vm_page_free_wanted;
250 unsigned int vm_page_free_wanted_privileged;
251 unsigned int vm_page_free_count;
252 unsigned int vm_page_fictitious_count;
253
254 unsigned int vm_page_free_count_minimum; /* debugging */
255
256 /*
257 * Occasionally, the virtual memory system uses
258 * resident page structures that do not refer to
259 * real pages, for example to leave a page with
260 * important state information in the VP table.
261 *
262 * These page structures are allocated the way
263 * most other kernel structures are.
264 */
265 zone_t vm_page_zone;
266 vm_locks_array_t vm_page_locks;
267 decl_lck_mtx_data(,vm_page_alloc_lock)
268 lck_mtx_ext_t vm_page_alloc_lock_ext;
269
270 unsigned int io_throttle_zero_fill;
271
272 unsigned int vm_page_local_q_count = 0;
273 unsigned int vm_page_local_q_soft_limit = 250;
274 unsigned int vm_page_local_q_hard_limit = 500;
275 struct vplq *vm_page_local_q = NULL;
276
277 /* N.B. Guard and fictitious pages must not
278 * be assigned a zero phys_page value.
279 */
280 /*
281 * Fictitious pages don't have a physical address,
282 * but we must initialize phys_page to something.
283 * For debugging, this should be a strange value
284 * that the pmap module can recognize in assertions.
285 */
286 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
287
288 /*
289 * Guard pages are not accessible so they don't
290 * need a physical address, but we need to enter
291 * one in the pmap.
292 * Let's make it recognizable and make sure that
293 * we don't use a real physical page with that
294 * physical address.
295 */
296 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
297
298 /*
299 * Resident page structures are also chained on
300 * queues that are used by the page replacement
301 * system (pageout daemon). These queues are
302 * defined here, but are shared by the pageout
303 * module. The inactive queue is broken into
304 * file backed and anonymous for convenience as the
305 * pageout daemon often assignes a higher
306 * importance to anonymous pages (less likely to pick)
307 */
308 queue_head_t vm_page_queue_active;
309 queue_head_t vm_page_queue_inactive;
310 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
311 queue_head_t vm_page_queue_throttled;
312
313 unsigned int vm_page_active_count;
314 unsigned int vm_page_inactive_count;
315 unsigned int vm_page_anonymous_count;
316 unsigned int vm_page_throttled_count;
317 unsigned int vm_page_speculative_count;
318 unsigned int vm_page_wire_count;
319 unsigned int vm_page_wire_count_initial;
320 unsigned int vm_page_gobble_count = 0;
321 unsigned int vm_page_wire_count_warning = 0;
322 unsigned int vm_page_gobble_count_warning = 0;
323
324 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
325 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
326 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
327
328 unsigned int vm_page_external_count = 0;
329 unsigned int vm_page_internal_count = 0;
330 unsigned int vm_page_pageable_external_count = 0;
331 unsigned int vm_page_pageable_internal_count = 0;
332
333 #if DEVELOPMENT || DEBUG
334 unsigned int vm_page_speculative_recreated = 0;
335 unsigned int vm_page_speculative_created = 0;
336 unsigned int vm_page_speculative_used = 0;
337 #endif
338
339 queue_head_t vm_page_queue_cleaned;
340
341 unsigned int vm_page_cleaned_count = 0;
342 unsigned int vm_pageout_enqueued_cleaned = 0;
343
344 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
345 ppnum_t max_valid_low_ppnum = 0xffffffff;
346
347
348 /*
349 * Several page replacement parameters are also
350 * shared with this module, so that page allocation
351 * (done here in vm_page_alloc) can trigger the
352 * pageout daemon.
353 */
354 unsigned int vm_page_free_target = 0;
355 unsigned int vm_page_free_min = 0;
356 unsigned int vm_page_throttle_limit = 0;
357 uint32_t vm_page_creation_throttle = 0;
358 unsigned int vm_page_inactive_target = 0;
359 unsigned int vm_page_anonymous_min = 0;
360 unsigned int vm_page_inactive_min = 0;
361 unsigned int vm_page_free_reserved = 0;
362 unsigned int vm_page_throttle_count = 0;
363
364
365 /*
366 * The VM system has a couple of heuristics for deciding
367 * that pages are "uninteresting" and should be placed
368 * on the inactive queue as likely candidates for replacement.
369 * These variables let the heuristics be controlled at run-time
370 * to make experimentation easier.
371 */
372
373 boolean_t vm_page_deactivate_hint = TRUE;
374
375 struct vm_page_stats_reusable vm_page_stats_reusable;
376
377 /*
378 * vm_set_page_size:
379 *
380 * Sets the page size, perhaps based upon the memory
381 * size. Must be called before any use of page-size
382 * dependent functions.
383 *
384 * Sets page_shift and page_mask from page_size.
385 */
386 void
387 vm_set_page_size(void)
388 {
389 page_mask = page_size - 1;
390
391 if ((page_mask & page_size) != 0)
392 panic("vm_set_page_size: page size not a power of two");
393
394 for (page_shift = 0; ; page_shift++)
395 if ((1U << page_shift) == page_size)
396 break;
397 }
398
399
400 /* Called once during statup, once the cache geometry is known.
401 */
402 static void
403 vm_page_set_colors( void )
404 {
405 unsigned int n, override;
406
407 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
408 n = override;
409 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
410 n = vm_cache_geometry_colors;
411 else n = DEFAULT_COLORS; /* use default if all else fails */
412
413 if ( n == 0 )
414 n = 1;
415 if ( n > MAX_COLORS )
416 n = MAX_COLORS;
417
418 /* the count must be a power of 2 */
419 if ( ( n & (n - 1)) != 0 )
420 panic("vm_page_set_colors");
421
422 vm_colors = n;
423 vm_color_mask = n - 1;
424 }
425
426
427 lck_grp_t vm_page_lck_grp_free;
428 lck_grp_t vm_page_lck_grp_queue;
429 lck_grp_t vm_page_lck_grp_local;
430 lck_grp_t vm_page_lck_grp_purge;
431 lck_grp_t vm_page_lck_grp_alloc;
432 lck_grp_t vm_page_lck_grp_bucket;
433 lck_grp_attr_t vm_page_lck_grp_attr;
434 lck_attr_t vm_page_lck_attr;
435
436
437 __private_extern__ void
438 vm_page_init_lck_grp(void)
439 {
440 /*
441 * initialze the vm_page lock world
442 */
443 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
444 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
445 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
446 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
447 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
448 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
449 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
450 lck_attr_setdefault(&vm_page_lck_attr);
451 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
452
453 vm_compressor_init_locks();
454 }
455
456 void
457 vm_page_init_local_q()
458 {
459 unsigned int num_cpus;
460 unsigned int i;
461 struct vplq *t_local_q;
462
463 num_cpus = ml_get_max_cpus();
464
465 /*
466 * no point in this for a uni-processor system
467 */
468 if (num_cpus >= 2) {
469 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
470
471 for (i = 0; i < num_cpus; i++) {
472 struct vpl *lq;
473
474 lq = &t_local_q[i].vpl_un.vpl;
475 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
476 queue_init(&lq->vpl_queue);
477 lq->vpl_count = 0;
478 lq->vpl_internal_count = 0;
479 lq->vpl_external_count = 0;
480 }
481 vm_page_local_q_count = num_cpus;
482
483 vm_page_local_q = (struct vplq *)t_local_q;
484 }
485 }
486
487
488 /*
489 * vm_page_bootstrap:
490 *
491 * Initializes the resident memory module.
492 *
493 * Allocates memory for the page cells, and
494 * for the object/offset-to-page hash table headers.
495 * Each page cell is initialized and placed on the free list.
496 * Returns the range of available kernel virtual memory.
497 */
498
499 void
500 vm_page_bootstrap(
501 vm_offset_t *startp,
502 vm_offset_t *endp)
503 {
504 register vm_page_t m;
505 unsigned int i;
506 unsigned int log1;
507 unsigned int log2;
508 unsigned int size;
509
510 /*
511 * Initialize the vm_page template.
512 */
513
514 m = &vm_page_template;
515 bzero(m, sizeof (*m));
516
517 m->pageq.next = NULL;
518 m->pageq.prev = NULL;
519 m->listq.next = NULL;
520 m->listq.prev = NULL;
521 m->next = VM_PAGE_NULL;
522
523 m->object = VM_OBJECT_NULL; /* reset later */
524 m->offset = (vm_object_offset_t) -1; /* reset later */
525
526 m->wire_count = 0;
527 m->local = FALSE;
528 m->inactive = FALSE;
529 m->active = FALSE;
530 m->pageout_queue = FALSE;
531 m->speculative = FALSE;
532 m->laundry = FALSE;
533 m->free = FALSE;
534 m->reference = FALSE;
535 m->gobbled = FALSE;
536 m->private = FALSE;
537 m->throttled = FALSE;
538 m->__unused_pageq_bits = 0;
539
540 m->phys_page = 0; /* reset later */
541
542 m->busy = TRUE;
543 m->wanted = FALSE;
544 m->tabled = FALSE;
545 m->hashed = FALSE;
546 m->fictitious = FALSE;
547 m->pmapped = FALSE;
548 m->wpmapped = FALSE;
549 m->pageout = FALSE;
550 m->absent = FALSE;
551 m->error = FALSE;
552 m->dirty = FALSE;
553 m->cleaning = FALSE;
554 m->precious = FALSE;
555 m->clustered = FALSE;
556 m->overwriting = FALSE;
557 m->restart = FALSE;
558 m->unusual = FALSE;
559 m->encrypted = FALSE;
560 m->encrypted_cleaning = FALSE;
561 m->cs_validated = FALSE;
562 m->cs_tainted = FALSE;
563 m->no_cache = FALSE;
564 m->reusable = FALSE;
565 m->slid = FALSE;
566 m->was_dirty = FALSE;
567 m->xpmapped = FALSE;
568 m->compressor = FALSE;
569 m->written_by_kernel = FALSE;
570 m->__unused_object_bits = 0;
571
572 /*
573 * Initialize the page queues.
574 */
575 vm_page_init_lck_grp();
576
577 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
578 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
579 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
580
581 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
582 int group;
583
584 purgeable_queues[i].token_q_head = 0;
585 purgeable_queues[i].token_q_tail = 0;
586 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
587 queue_init(&purgeable_queues[i].objq[group]);
588
589 purgeable_queues[i].type = i;
590 purgeable_queues[i].new_pages = 0;
591 #if MACH_ASSERT
592 purgeable_queues[i].debug_count_tokens = 0;
593 purgeable_queues[i].debug_count_objects = 0;
594 #endif
595 };
596
597 for (i = 0; i < MAX_COLORS; i++ )
598 queue_init(&vm_page_queue_free[i]);
599
600 queue_init(&vm_lopage_queue_free);
601 queue_init(&vm_page_queue_active);
602 queue_init(&vm_page_queue_inactive);
603 queue_init(&vm_page_queue_cleaned);
604 queue_init(&vm_page_queue_throttled);
605 queue_init(&vm_page_queue_anonymous);
606
607 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
608 queue_init(&vm_page_queue_speculative[i].age_q);
609
610 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
611 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
612 }
613 vm_page_free_wanted = 0;
614 vm_page_free_wanted_privileged = 0;
615
616 vm_page_set_colors();
617
618
619 /*
620 * Steal memory for the map and zone subsystems.
621 */
622 zone_steal_memory();
623 vm_map_steal_memory();
624
625 /*
626 * Allocate (and initialize) the virtual-to-physical
627 * table hash buckets.
628 *
629 * The number of buckets should be a power of two to
630 * get a good hash function. The following computation
631 * chooses the first power of two that is greater
632 * than the number of physical pages in the system.
633 */
634
635 if (vm_page_bucket_count == 0) {
636 unsigned int npages = pmap_free_pages();
637
638 vm_page_bucket_count = 1;
639 while (vm_page_bucket_count < npages)
640 vm_page_bucket_count <<= 1;
641 }
642 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
643
644 vm_page_hash_mask = vm_page_bucket_count - 1;
645
646 /*
647 * Calculate object shift value for hashing algorithm:
648 * O = log2(sizeof(struct vm_object))
649 * B = log2(vm_page_bucket_count)
650 * hash shifts the object left by
651 * B/2 - O
652 */
653 size = vm_page_bucket_count;
654 for (log1 = 0; size > 1; log1++)
655 size /= 2;
656 size = sizeof(struct vm_object);
657 for (log2 = 0; size > 1; log2++)
658 size /= 2;
659 vm_page_hash_shift = log1/2 - log2 + 1;
660
661 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
662 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
663 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
664
665 if (vm_page_hash_mask & vm_page_bucket_count)
666 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
667
668 #if VM_PAGE_BUCKETS_CHECK
669 #if VM_PAGE_FAKE_BUCKETS
670 /*
671 * Allocate a decoy set of page buckets, to detect
672 * any stomping there.
673 */
674 vm_page_fake_buckets = (vm_page_bucket_t *)
675 pmap_steal_memory(vm_page_bucket_count *
676 sizeof(vm_page_bucket_t));
677 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
678 vm_page_fake_buckets_end =
679 vm_map_round_page((vm_page_fake_buckets_start +
680 (vm_page_bucket_count *
681 sizeof (vm_page_bucket_t))),
682 PAGE_MASK);
683 char *cp;
684 for (cp = (char *)vm_page_fake_buckets_start;
685 cp < (char *)vm_page_fake_buckets_end;
686 cp++) {
687 *cp = 0x5a;
688 }
689 #endif /* VM_PAGE_FAKE_BUCKETS */
690 #endif /* VM_PAGE_BUCKETS_CHECK */
691
692 vm_page_buckets = (vm_page_bucket_t *)
693 pmap_steal_memory(vm_page_bucket_count *
694 sizeof(vm_page_bucket_t));
695
696 vm_page_bucket_locks = (lck_spin_t *)
697 pmap_steal_memory(vm_page_bucket_lock_count *
698 sizeof(lck_spin_t));
699
700 for (i = 0; i < vm_page_bucket_count; i++) {
701 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
702
703 bucket->pages = VM_PAGE_NULL;
704 #if MACH_PAGE_HASH_STATS
705 bucket->cur_count = 0;
706 bucket->hi_count = 0;
707 #endif /* MACH_PAGE_HASH_STATS */
708 }
709
710 for (i = 0; i < vm_page_bucket_lock_count; i++)
711 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
712
713 #if VM_PAGE_BUCKETS_CHECK
714 vm_page_buckets_check_ready = TRUE;
715 #endif /* VM_PAGE_BUCKETS_CHECK */
716
717 /*
718 * Machine-dependent code allocates the resident page table.
719 * It uses vm_page_init to initialize the page frames.
720 * The code also returns to us the virtual space available
721 * to the kernel. We don't trust the pmap module
722 * to get the alignment right.
723 */
724
725 pmap_startup(&virtual_space_start, &virtual_space_end);
726 virtual_space_start = round_page(virtual_space_start);
727 virtual_space_end = trunc_page(virtual_space_end);
728
729 *startp = virtual_space_start;
730 *endp = virtual_space_end;
731
732 /*
733 * Compute the initial "wire" count.
734 * Up until now, the pages which have been set aside are not under
735 * the VM system's control, so although they aren't explicitly
736 * wired, they nonetheless can't be moved. At this moment,
737 * all VM managed pages are "free", courtesy of pmap_startup.
738 */
739 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
740 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
741 vm_page_wire_count_initial = vm_page_wire_count;
742 vm_page_free_count_minimum = vm_page_free_count;
743
744 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
745 vm_page_free_count, vm_page_wire_count);
746
747 simple_lock_init(&vm_paging_lock, 0);
748 }
749
750 #ifndef MACHINE_PAGES
751 /*
752 * We implement pmap_steal_memory and pmap_startup with the help
753 * of two simpler functions, pmap_virtual_space and pmap_next_page.
754 */
755
756 void *
757 pmap_steal_memory(
758 vm_size_t size)
759 {
760 vm_offset_t addr, vaddr;
761 ppnum_t phys_page;
762
763 /*
764 * We round the size to a round multiple.
765 */
766
767 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
768
769 /*
770 * If this is the first call to pmap_steal_memory,
771 * we have to initialize ourself.
772 */
773
774 if (virtual_space_start == virtual_space_end) {
775 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
776
777 /*
778 * The initial values must be aligned properly, and
779 * we don't trust the pmap module to do it right.
780 */
781
782 virtual_space_start = round_page(virtual_space_start);
783 virtual_space_end = trunc_page(virtual_space_end);
784 }
785
786 /*
787 * Allocate virtual memory for this request.
788 */
789
790 addr = virtual_space_start;
791 virtual_space_start += size;
792
793 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
794
795 /*
796 * Allocate and map physical pages to back new virtual pages.
797 */
798
799 for (vaddr = round_page(addr);
800 vaddr < addr + size;
801 vaddr += PAGE_SIZE) {
802
803 if (!pmap_next_page_hi(&phys_page))
804 panic("pmap_steal_memory");
805
806 /*
807 * XXX Logically, these mappings should be wired,
808 * but some pmap modules barf if they are.
809 */
810 #if defined(__LP64__)
811 pmap_pre_expand(kernel_pmap, vaddr);
812 #endif
813
814 pmap_enter(kernel_pmap, vaddr, phys_page,
815 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
816 VM_WIMG_USE_DEFAULT, FALSE);
817 /*
818 * Account for newly stolen memory
819 */
820 vm_page_wire_count++;
821
822 }
823
824 return (void *) addr;
825 }
826
827 void
828 pmap_startup(
829 vm_offset_t *startp,
830 vm_offset_t *endp)
831 {
832 unsigned int i, npages, pages_initialized, fill, fillval;
833 ppnum_t phys_page;
834 addr64_t tmpaddr;
835
836 /*
837 * We calculate how many page frames we will have
838 * and then allocate the page structures in one chunk.
839 */
840
841 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
842 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
843 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
844
845 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
846
847 /*
848 * Initialize the page frames.
849 */
850 for (i = 0, pages_initialized = 0; i < npages; i++) {
851 if (!pmap_next_page(&phys_page))
852 break;
853 if (pages_initialized == 0 || phys_page < vm_page_lowest)
854 vm_page_lowest = phys_page;
855
856 vm_page_init(&vm_pages[i], phys_page, FALSE);
857 vm_page_pages++;
858 pages_initialized++;
859 }
860 vm_pages_count = pages_initialized;
861
862 /*
863 * Check if we want to initialize pages to a known value
864 */
865 fill = 0; /* Assume no fill */
866 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
867 #if DEBUG
868 /* This slows down booting the DEBUG kernel, particularly on
869 * large memory systems, but is worthwhile in deterministically
870 * trapping uninitialized memory usage.
871 */
872 if (fill == 0) {
873 fill = 1;
874 fillval = 0xDEB8F177;
875 }
876 #endif
877 if (fill)
878 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
879 // -debug code remove
880 if (2 == vm_himemory_mode) {
881 // free low -> high so high is preferred
882 for (i = 1; i <= pages_initialized; i++) {
883 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
884 vm_page_release(&vm_pages[i - 1]);
885 }
886 }
887 else
888 // debug code remove-
889
890 /*
891 * Release pages in reverse order so that physical pages
892 * initially get allocated in ascending addresses. This keeps
893 * the devices (which must address physical memory) happy if
894 * they require several consecutive pages.
895 */
896 for (i = pages_initialized; i > 0; i--) {
897 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
898 vm_page_release(&vm_pages[i - 1]);
899 }
900
901 #if 0
902 {
903 vm_page_t xx, xxo, xxl;
904 int i, j, k, l;
905
906 j = 0; /* (BRINGUP) */
907 xxl = 0;
908
909 for( i = 0; i < vm_colors; i++ ) {
910 queue_iterate(&vm_page_queue_free[i],
911 xx,
912 vm_page_t,
913 pageq) { /* BRINGUP */
914 j++; /* (BRINGUP) */
915 if(j > vm_page_free_count) { /* (BRINGUP) */
916 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
917 }
918
919 l = vm_page_free_count - j; /* (BRINGUP) */
920 k = 0; /* (BRINGUP) */
921
922 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
923
924 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
925 k++;
926 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
927 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
928 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
929 }
930 }
931
932 xxl = xx;
933 }
934 }
935
936 if(j != vm_page_free_count) { /* (BRINGUP) */
937 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
938 }
939 }
940 #endif
941
942
943 /*
944 * We have to re-align virtual_space_start,
945 * because pmap_steal_memory has been using it.
946 */
947
948 virtual_space_start = round_page(virtual_space_start);
949
950 *startp = virtual_space_start;
951 *endp = virtual_space_end;
952 }
953 #endif /* MACHINE_PAGES */
954
955 /*
956 * Routine: vm_page_module_init
957 * Purpose:
958 * Second initialization pass, to be done after
959 * the basic VM system is ready.
960 */
961 void
962 vm_page_module_init(void)
963 {
964 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
965 0, PAGE_SIZE, "vm pages");
966
967 #if ZONE_DEBUG
968 zone_debug_disable(vm_page_zone);
969 #endif /* ZONE_DEBUG */
970
971 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
972 zone_change(vm_page_zone, Z_EXPAND, FALSE);
973 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
974 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
975 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
976 /*
977 * Adjust zone statistics to account for the real pages allocated
978 * in vm_page_create(). [Q: is this really what we want?]
979 */
980 vm_page_zone->count += vm_page_pages;
981 vm_page_zone->sum_count += vm_page_pages;
982 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
983 }
984
985 /*
986 * Routine: vm_page_create
987 * Purpose:
988 * After the VM system is up, machine-dependent code
989 * may stumble across more physical memory. For example,
990 * memory that it was reserving for a frame buffer.
991 * vm_page_create turns this memory into available pages.
992 */
993
994 void
995 vm_page_create(
996 ppnum_t start,
997 ppnum_t end)
998 {
999 ppnum_t phys_page;
1000 vm_page_t m;
1001
1002 for (phys_page = start;
1003 phys_page < end;
1004 phys_page++) {
1005 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1006 == VM_PAGE_NULL)
1007 vm_page_more_fictitious();
1008
1009 m->fictitious = FALSE;
1010 pmap_clear_noencrypt(phys_page);
1011
1012 vm_page_pages++;
1013 vm_page_release(m);
1014 }
1015 }
1016
1017 /*
1018 * vm_page_hash:
1019 *
1020 * Distributes the object/offset key pair among hash buckets.
1021 *
1022 * NOTE: The bucket count must be a power of 2
1023 */
1024 #define vm_page_hash(object, offset) (\
1025 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1026 & vm_page_hash_mask)
1027
1028
1029 /*
1030 * vm_page_insert: [ internal use only ]
1031 *
1032 * Inserts the given mem entry into the object/object-page
1033 * table and object list.
1034 *
1035 * The object must be locked.
1036 */
1037 void
1038 vm_page_insert(
1039 vm_page_t mem,
1040 vm_object_t object,
1041 vm_object_offset_t offset)
1042 {
1043 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1044 }
1045
1046 void
1047 vm_page_insert_internal(
1048 vm_page_t mem,
1049 vm_object_t object,
1050 vm_object_offset_t offset,
1051 boolean_t queues_lock_held,
1052 boolean_t insert_in_hash,
1053 boolean_t batch_pmap_op)
1054 {
1055 vm_page_bucket_t *bucket;
1056 lck_spin_t *bucket_lock;
1057 int hash_id;
1058
1059 XPR(XPR_VM_PAGE,
1060 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1061 object, offset, mem, 0,0);
1062 #if 0
1063 /*
1064 * we may not hold the page queue lock
1065 * so this check isn't safe to make
1066 */
1067 VM_PAGE_CHECK(mem);
1068 #endif
1069
1070 assert(page_aligned(offset));
1071
1072 if (object == vm_submap_object) {
1073 /* the vm_submap_object is only a placeholder for submaps */
1074 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1075 }
1076
1077 vm_object_lock_assert_exclusive(object);
1078 #if DEBUG
1079 lck_mtx_assert(&vm_page_queue_lock,
1080 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1081 : LCK_MTX_ASSERT_NOTOWNED);
1082 #endif /* DEBUG */
1083
1084 if (insert_in_hash == TRUE) {
1085 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1086 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1087 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1088 "already in (obj=%p,off=0x%llx)",
1089 mem, object, offset, mem->object, mem->offset);
1090 #endif
1091 assert(!object->internal || offset < object->vo_size);
1092
1093 /* only insert "pageout" pages into "pageout" objects,
1094 * and normal pages into normal objects */
1095 assert(object->pageout == mem->pageout);
1096
1097 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1098
1099 /*
1100 * Record the object/offset pair in this page
1101 */
1102
1103 mem->object = object;
1104 mem->offset = offset;
1105
1106 /*
1107 * Insert it into the object_object/offset hash table
1108 */
1109 hash_id = vm_page_hash(object, offset);
1110 bucket = &vm_page_buckets[hash_id];
1111 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1112
1113 lck_spin_lock(bucket_lock);
1114
1115 mem->next = bucket->pages;
1116 bucket->pages = mem;
1117 #if MACH_PAGE_HASH_STATS
1118 if (++bucket->cur_count > bucket->hi_count)
1119 bucket->hi_count = bucket->cur_count;
1120 #endif /* MACH_PAGE_HASH_STATS */
1121 mem->hashed = TRUE;
1122 lck_spin_unlock(bucket_lock);
1123 }
1124
1125 {
1126 unsigned int cache_attr;
1127
1128 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1129
1130 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1131 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1132 }
1133 }
1134 /*
1135 * Now link into the object's list of backed pages.
1136 */
1137 VM_PAGE_INSERT(mem, object);
1138 mem->tabled = TRUE;
1139
1140 /*
1141 * Show that the object has one more resident page.
1142 */
1143
1144 object->resident_page_count++;
1145 if (VM_PAGE_WIRED(mem)) {
1146 object->wired_page_count++;
1147 }
1148 assert(object->resident_page_count >= object->wired_page_count);
1149
1150 if (object->internal) {
1151 OSAddAtomic(1, &vm_page_internal_count);
1152 } else {
1153 OSAddAtomic(1, &vm_page_external_count);
1154 }
1155
1156 /*
1157 * It wouldn't make sense to insert a "reusable" page in
1158 * an object (the page would have been marked "reusable" only
1159 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1160 * in the object at that time).
1161 * But a page could be inserted in a "all_reusable" object, if
1162 * something faults it in (a vm_read() from another task or a
1163 * "use-after-free" issue in user space, for example). It can
1164 * also happen if we're relocating a page from that object to
1165 * a different physical page during a physically-contiguous
1166 * allocation.
1167 */
1168 assert(!mem->reusable);
1169 if (mem->object->all_reusable) {
1170 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1171 }
1172
1173 if (object->purgable == VM_PURGABLE_VOLATILE) {
1174 if (VM_PAGE_WIRED(mem)) {
1175 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1176 } else {
1177 OSAddAtomic(1, &vm_page_purgeable_count);
1178 }
1179 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1180 mem->throttled) {
1181 /*
1182 * This page belongs to a purged VM object but hasn't
1183 * been purged (because it was "busy").
1184 * It's in the "throttled" queue and hence not
1185 * visible to vm_pageout_scan(). Move it to a pageable
1186 * queue, so that it can eventually be reclaimed, instead
1187 * of lingering in the "empty" object.
1188 */
1189 if (queues_lock_held == FALSE)
1190 vm_page_lockspin_queues();
1191 vm_page_deactivate(mem);
1192 if (queues_lock_held == FALSE)
1193 vm_page_unlock_queues();
1194 }
1195 }
1196
1197 /*
1198 * vm_page_replace:
1199 *
1200 * Exactly like vm_page_insert, except that we first
1201 * remove any existing page at the given offset in object.
1202 *
1203 * The object must be locked.
1204 */
1205 void
1206 vm_page_replace(
1207 register vm_page_t mem,
1208 register vm_object_t object,
1209 register vm_object_offset_t offset)
1210 {
1211 vm_page_bucket_t *bucket;
1212 vm_page_t found_m = VM_PAGE_NULL;
1213 lck_spin_t *bucket_lock;
1214 int hash_id;
1215
1216 #if 0
1217 /*
1218 * we don't hold the page queue lock
1219 * so this check isn't safe to make
1220 */
1221 VM_PAGE_CHECK(mem);
1222 #endif
1223 vm_object_lock_assert_exclusive(object);
1224 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1225 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1226 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1227 "already in (obj=%p,off=0x%llx)",
1228 mem, object, offset, mem->object, mem->offset);
1229 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1230 #endif
1231 /*
1232 * Record the object/offset pair in this page
1233 */
1234
1235 mem->object = object;
1236 mem->offset = offset;
1237
1238 /*
1239 * Insert it into the object_object/offset hash table,
1240 * replacing any page that might have been there.
1241 */
1242
1243 hash_id = vm_page_hash(object, offset);
1244 bucket = &vm_page_buckets[hash_id];
1245 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1246
1247 lck_spin_lock(bucket_lock);
1248
1249 if (bucket->pages) {
1250 vm_page_t *mp = &bucket->pages;
1251 vm_page_t m = *mp;
1252
1253 do {
1254 if (m->object == object && m->offset == offset) {
1255 /*
1256 * Remove old page from hash list
1257 */
1258 *mp = m->next;
1259 m->hashed = FALSE;
1260
1261 found_m = m;
1262 break;
1263 }
1264 mp = &m->next;
1265 } while ((m = *mp));
1266
1267 mem->next = bucket->pages;
1268 } else {
1269 mem->next = VM_PAGE_NULL;
1270 }
1271 /*
1272 * insert new page at head of hash list
1273 */
1274 bucket->pages = mem;
1275 mem->hashed = TRUE;
1276
1277 lck_spin_unlock(bucket_lock);
1278
1279 if (found_m) {
1280 /*
1281 * there was already a page at the specified
1282 * offset for this object... remove it from
1283 * the object and free it back to the free list
1284 */
1285 vm_page_free_unlocked(found_m, FALSE);
1286 }
1287 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1288 }
1289
1290 /*
1291 * vm_page_remove: [ internal use only ]
1292 *
1293 * Removes the given mem entry from the object/offset-page
1294 * table and the object page list.
1295 *
1296 * The object must be locked.
1297 */
1298
1299 void
1300 vm_page_remove(
1301 vm_page_t mem,
1302 boolean_t remove_from_hash)
1303 {
1304 vm_page_bucket_t *bucket;
1305 vm_page_t this;
1306 lck_spin_t *bucket_lock;
1307 int hash_id;
1308
1309 XPR(XPR_VM_PAGE,
1310 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1311 mem->object, mem->offset,
1312 mem, 0,0);
1313
1314 vm_object_lock_assert_exclusive(mem->object);
1315 assert(mem->tabled);
1316 assert(!mem->cleaning);
1317 assert(!mem->laundry);
1318 #if 0
1319 /*
1320 * we don't hold the page queue lock
1321 * so this check isn't safe to make
1322 */
1323 VM_PAGE_CHECK(mem);
1324 #endif
1325 if (remove_from_hash == TRUE) {
1326 /*
1327 * Remove from the object_object/offset hash table
1328 */
1329 hash_id = vm_page_hash(mem->object, mem->offset);
1330 bucket = &vm_page_buckets[hash_id];
1331 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1332
1333 lck_spin_lock(bucket_lock);
1334
1335 if ((this = bucket->pages) == mem) {
1336 /* optimize for common case */
1337
1338 bucket->pages = mem->next;
1339 } else {
1340 vm_page_t *prev;
1341
1342 for (prev = &this->next;
1343 (this = *prev) != mem;
1344 prev = &this->next)
1345 continue;
1346 *prev = this->next;
1347 }
1348 #if MACH_PAGE_HASH_STATS
1349 bucket->cur_count--;
1350 #endif /* MACH_PAGE_HASH_STATS */
1351 mem->hashed = FALSE;
1352 lck_spin_unlock(bucket_lock);
1353 }
1354 /*
1355 * Now remove from the object's list of backed pages.
1356 */
1357
1358 VM_PAGE_REMOVE(mem);
1359
1360 /*
1361 * And show that the object has one fewer resident
1362 * page.
1363 */
1364
1365 assert(mem->object->resident_page_count > 0);
1366 mem->object->resident_page_count--;
1367
1368 if (mem->object->internal) {
1369 assert(vm_page_internal_count);
1370 OSAddAtomic(-1, &vm_page_internal_count);
1371 } else {
1372 assert(vm_page_external_count);
1373 OSAddAtomic(-1, &vm_page_external_count);
1374 }
1375 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1376 if (mem->object->resident_page_count == 0)
1377 vm_object_cache_remove(mem->object);
1378 }
1379
1380 if (VM_PAGE_WIRED(mem)) {
1381 assert(mem->object->wired_page_count > 0);
1382 mem->object->wired_page_count--;
1383 }
1384 assert(mem->object->resident_page_count >=
1385 mem->object->wired_page_count);
1386 if (mem->reusable) {
1387 assert(mem->object->reusable_page_count > 0);
1388 mem->object->reusable_page_count--;
1389 assert(mem->object->reusable_page_count <=
1390 mem->object->resident_page_count);
1391 mem->reusable = FALSE;
1392 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1393 vm_page_stats_reusable.reused_remove++;
1394 } else if (mem->object->all_reusable) {
1395 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1396 vm_page_stats_reusable.reused_remove++;
1397 }
1398
1399 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1400 if (VM_PAGE_WIRED(mem)) {
1401 assert(vm_page_purgeable_wired_count > 0);
1402 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1403 } else {
1404 assert(vm_page_purgeable_count > 0);
1405 OSAddAtomic(-1, &vm_page_purgeable_count);
1406 }
1407 }
1408 if (mem->object->set_cache_attr == TRUE)
1409 pmap_set_cache_attributes(mem->phys_page, 0);
1410
1411 mem->tabled = FALSE;
1412 mem->object = VM_OBJECT_NULL;
1413 mem->offset = (vm_object_offset_t) -1;
1414 }
1415
1416
1417 /*
1418 * vm_page_lookup:
1419 *
1420 * Returns the page associated with the object/offset
1421 * pair specified; if none is found, VM_PAGE_NULL is returned.
1422 *
1423 * The object must be locked. No side effects.
1424 */
1425
1426 unsigned long vm_page_lookup_hint = 0;
1427 unsigned long vm_page_lookup_hint_next = 0;
1428 unsigned long vm_page_lookup_hint_prev = 0;
1429 unsigned long vm_page_lookup_hint_miss = 0;
1430 unsigned long vm_page_lookup_bucket_NULL = 0;
1431 unsigned long vm_page_lookup_miss = 0;
1432
1433
1434 vm_page_t
1435 vm_page_lookup(
1436 vm_object_t object,
1437 vm_object_offset_t offset)
1438 {
1439 vm_page_t mem;
1440 vm_page_bucket_t *bucket;
1441 queue_entry_t qe;
1442 lck_spin_t *bucket_lock;
1443 int hash_id;
1444
1445 vm_object_lock_assert_held(object);
1446 mem = object->memq_hint;
1447
1448 if (mem != VM_PAGE_NULL) {
1449 assert(mem->object == object);
1450
1451 if (mem->offset == offset) {
1452 vm_page_lookup_hint++;
1453 return mem;
1454 }
1455 qe = queue_next(&mem->listq);
1456
1457 if (! queue_end(&object->memq, qe)) {
1458 vm_page_t next_page;
1459
1460 next_page = (vm_page_t) qe;
1461 assert(next_page->object == object);
1462
1463 if (next_page->offset == offset) {
1464 vm_page_lookup_hint_next++;
1465 object->memq_hint = next_page; /* new hint */
1466 return next_page;
1467 }
1468 }
1469 qe = queue_prev(&mem->listq);
1470
1471 if (! queue_end(&object->memq, qe)) {
1472 vm_page_t prev_page;
1473
1474 prev_page = (vm_page_t) qe;
1475 assert(prev_page->object == object);
1476
1477 if (prev_page->offset == offset) {
1478 vm_page_lookup_hint_prev++;
1479 object->memq_hint = prev_page; /* new hint */
1480 return prev_page;
1481 }
1482 }
1483 }
1484 /*
1485 * Search the hash table for this object/offset pair
1486 */
1487 hash_id = vm_page_hash(object, offset);
1488 bucket = &vm_page_buckets[hash_id];
1489
1490 /*
1491 * since we hold the object lock, we are guaranteed that no
1492 * new pages can be inserted into this object... this in turn
1493 * guarantess that the page we're looking for can't exist
1494 * if the bucket it hashes to is currently NULL even when looked
1495 * at outside the scope of the hash bucket lock... this is a
1496 * really cheap optimiztion to avoid taking the lock
1497 */
1498 if (bucket->pages == VM_PAGE_NULL) {
1499 vm_page_lookup_bucket_NULL++;
1500
1501 return (VM_PAGE_NULL);
1502 }
1503 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1504
1505 lck_spin_lock(bucket_lock);
1506
1507 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1508 #if 0
1509 /*
1510 * we don't hold the page queue lock
1511 * so this check isn't safe to make
1512 */
1513 VM_PAGE_CHECK(mem);
1514 #endif
1515 if ((mem->object == object) && (mem->offset == offset))
1516 break;
1517 }
1518 lck_spin_unlock(bucket_lock);
1519
1520 if (mem != VM_PAGE_NULL) {
1521 if (object->memq_hint != VM_PAGE_NULL) {
1522 vm_page_lookup_hint_miss++;
1523 }
1524 assert(mem->object == object);
1525 object->memq_hint = mem;
1526 } else
1527 vm_page_lookup_miss++;
1528
1529 return(mem);
1530 }
1531
1532
1533 /*
1534 * vm_page_rename:
1535 *
1536 * Move the given memory entry from its
1537 * current object to the specified target object/offset.
1538 *
1539 * The object must be locked.
1540 */
1541 void
1542 vm_page_rename(
1543 register vm_page_t mem,
1544 register vm_object_t new_object,
1545 vm_object_offset_t new_offset,
1546 boolean_t encrypted_ok)
1547 {
1548 boolean_t internal_to_external, external_to_internal;
1549
1550 assert(mem->object != new_object);
1551
1552 /*
1553 * ENCRYPTED SWAP:
1554 * The encryption key is based on the page's memory object
1555 * (aka "pager") and paging offset. Moving the page to
1556 * another VM object changes its "pager" and "paging_offset"
1557 * so it has to be decrypted first, or we would lose the key.
1558 *
1559 * One exception is VM object collapsing, where we transfer pages
1560 * from one backing object to its parent object. This operation also
1561 * transfers the paging information, so the <pager,paging_offset> info
1562 * should remain consistent. The caller (vm_object_do_collapse())
1563 * sets "encrypted_ok" in this case.
1564 */
1565 if (!encrypted_ok && mem->encrypted) {
1566 panic("vm_page_rename: page %p is encrypted\n", mem);
1567 }
1568
1569 XPR(XPR_VM_PAGE,
1570 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1571 new_object, new_offset,
1572 mem, 0,0);
1573
1574 /*
1575 * Changes to mem->object require the page lock because
1576 * the pageout daemon uses that lock to get the object.
1577 */
1578 vm_page_lockspin_queues();
1579
1580 internal_to_external = FALSE;
1581 external_to_internal = FALSE;
1582
1583 if (mem->local) {
1584 /*
1585 * it's much easier to get the vm_page_pageable_xxx accounting correct
1586 * if we first move the page to the active queue... it's going to end
1587 * up there anyway, and we don't do vm_page_rename's frequently enough
1588 * for this to matter.
1589 */
1590 VM_PAGE_QUEUES_REMOVE(mem);
1591 vm_page_activate(mem);
1592 }
1593 if (mem->active || mem->inactive || mem->speculative) {
1594 if (mem->object->internal && !new_object->internal) {
1595 internal_to_external = TRUE;
1596 }
1597 if (!mem->object->internal && new_object->internal) {
1598 external_to_internal = TRUE;
1599 }
1600 }
1601
1602 vm_page_remove(mem, TRUE);
1603 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1604
1605 if (internal_to_external) {
1606 vm_page_pageable_internal_count--;
1607 vm_page_pageable_external_count++;
1608 } else if (external_to_internal) {
1609 vm_page_pageable_external_count--;
1610 vm_page_pageable_internal_count++;
1611 }
1612
1613 vm_page_unlock_queues();
1614 }
1615
1616 /*
1617 * vm_page_init:
1618 *
1619 * Initialize the fields in a new page.
1620 * This takes a structure with random values and initializes it
1621 * so that it can be given to vm_page_release or vm_page_insert.
1622 */
1623 void
1624 vm_page_init(
1625 vm_page_t mem,
1626 ppnum_t phys_page,
1627 boolean_t lopage)
1628 {
1629 assert(phys_page);
1630
1631 #if DEBUG
1632 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1633 if (!(pmap_valid_page(phys_page))) {
1634 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1635 }
1636 }
1637 #endif
1638 *mem = vm_page_template;
1639 mem->phys_page = phys_page;
1640 #if 0
1641 /*
1642 * we're leaving this turned off for now... currently pages
1643 * come off the free list and are either immediately dirtied/referenced
1644 * due to zero-fill or COW faults, or are used to read or write files...
1645 * in the file I/O case, the UPL mechanism takes care of clearing
1646 * the state of the HW ref/mod bits in a somewhat fragile way.
1647 * Since we may change the way this works in the future (to toughen it up),
1648 * I'm leaving this as a reminder of where these bits could get cleared
1649 */
1650
1651 /*
1652 * make sure both the h/w referenced and modified bits are
1653 * clear at this point... we are especially dependent on
1654 * not finding a 'stale' h/w modified in a number of spots
1655 * once this page goes back into use
1656 */
1657 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1658 #endif
1659 mem->lopage = lopage;
1660 }
1661
1662 /*
1663 * vm_page_grab_fictitious:
1664 *
1665 * Remove a fictitious page from the free list.
1666 * Returns VM_PAGE_NULL if there are no free pages.
1667 */
1668 int c_vm_page_grab_fictitious = 0;
1669 int c_vm_page_grab_fictitious_failed = 0;
1670 int c_vm_page_release_fictitious = 0;
1671 int c_vm_page_more_fictitious = 0;
1672
1673 vm_page_t
1674 vm_page_grab_fictitious_common(
1675 ppnum_t phys_addr)
1676 {
1677 vm_page_t m;
1678
1679 if ((m = (vm_page_t)zget(vm_page_zone))) {
1680
1681 vm_page_init(m, phys_addr, FALSE);
1682 m->fictitious = TRUE;
1683
1684 c_vm_page_grab_fictitious++;
1685 } else
1686 c_vm_page_grab_fictitious_failed++;
1687
1688 return m;
1689 }
1690
1691 vm_page_t
1692 vm_page_grab_fictitious(void)
1693 {
1694 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1695 }
1696
1697 vm_page_t
1698 vm_page_grab_guard(void)
1699 {
1700 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1701 }
1702
1703
1704 /*
1705 * vm_page_release_fictitious:
1706 *
1707 * Release a fictitious page to the zone pool
1708 */
1709 void
1710 vm_page_release_fictitious(
1711 vm_page_t m)
1712 {
1713 assert(!m->free);
1714 assert(m->fictitious);
1715 assert(m->phys_page == vm_page_fictitious_addr ||
1716 m->phys_page == vm_page_guard_addr);
1717
1718 c_vm_page_release_fictitious++;
1719
1720 zfree(vm_page_zone, m);
1721 }
1722
1723 /*
1724 * vm_page_more_fictitious:
1725 *
1726 * Add more fictitious pages to the zone.
1727 * Allowed to block. This routine is way intimate
1728 * with the zones code, for several reasons:
1729 * 1. we need to carve some page structures out of physical
1730 * memory before zones work, so they _cannot_ come from
1731 * the zone_map.
1732 * 2. the zone needs to be collectable in order to prevent
1733 * growth without bound. These structures are used by
1734 * the device pager (by the hundreds and thousands), as
1735 * private pages for pageout, and as blocking pages for
1736 * pagein. Temporary bursts in demand should not result in
1737 * permanent allocation of a resource.
1738 * 3. To smooth allocation humps, we allocate single pages
1739 * with kernel_memory_allocate(), and cram them into the
1740 * zone.
1741 */
1742
1743 void vm_page_more_fictitious(void)
1744 {
1745 vm_offset_t addr;
1746 kern_return_t retval;
1747
1748 c_vm_page_more_fictitious++;
1749
1750 /*
1751 * Allocate a single page from the zone_map. Do not wait if no physical
1752 * pages are immediately available, and do not zero the space. We need
1753 * our own blocking lock here to prevent having multiple,
1754 * simultaneous requests from piling up on the zone_map lock. Exactly
1755 * one (of our) threads should be potentially waiting on the map lock.
1756 * If winner is not vm-privileged, then the page allocation will fail,
1757 * and it will temporarily block here in the vm_page_wait().
1758 */
1759 lck_mtx_lock(&vm_page_alloc_lock);
1760 /*
1761 * If another thread allocated space, just bail out now.
1762 */
1763 if (zone_free_count(vm_page_zone) > 5) {
1764 /*
1765 * The number "5" is a small number that is larger than the
1766 * number of fictitious pages that any single caller will
1767 * attempt to allocate. Otherwise, a thread will attempt to
1768 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1769 * release all of the resources and locks already acquired,
1770 * and then call this routine. This routine finds the pages
1771 * that the caller released, so fails to allocate new space.
1772 * The process repeats infinitely. The largest known number
1773 * of fictitious pages required in this manner is 2. 5 is
1774 * simply a somewhat larger number.
1775 */
1776 lck_mtx_unlock(&vm_page_alloc_lock);
1777 return;
1778 }
1779
1780 retval = kernel_memory_allocate(zone_map,
1781 &addr, PAGE_SIZE, VM_PROT_ALL,
1782 KMA_KOBJECT|KMA_NOPAGEWAIT);
1783 if (retval != KERN_SUCCESS) {
1784 /*
1785 * No page was available. Drop the
1786 * lock to give another thread a chance at it, and
1787 * wait for the pageout daemon to make progress.
1788 */
1789 lck_mtx_unlock(&vm_page_alloc_lock);
1790 vm_page_wait(THREAD_UNINT);
1791 return;
1792 }
1793
1794 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1795 OSAddAtomic64(1, &(vm_page_zone->page_count));
1796
1797 zcram(vm_page_zone, addr, PAGE_SIZE);
1798
1799 lck_mtx_unlock(&vm_page_alloc_lock);
1800 }
1801
1802
1803 /*
1804 * vm_pool_low():
1805 *
1806 * Return true if it is not likely that a non-vm_privileged thread
1807 * can get memory without blocking. Advisory only, since the
1808 * situation may change under us.
1809 */
1810 int
1811 vm_pool_low(void)
1812 {
1813 /* No locking, at worst we will fib. */
1814 return( vm_page_free_count <= vm_page_free_reserved );
1815 }
1816
1817
1818
1819 /*
1820 * this is an interface to support bring-up of drivers
1821 * on platforms with physical memory > 4G...
1822 */
1823 int vm_himemory_mode = 0;
1824
1825
1826 /*
1827 * this interface exists to support hardware controllers
1828 * incapable of generating DMAs with more than 32 bits
1829 * of address on platforms with physical memory > 4G...
1830 */
1831 unsigned int vm_lopages_allocated_q = 0;
1832 unsigned int vm_lopages_allocated_cpm_success = 0;
1833 unsigned int vm_lopages_allocated_cpm_failed = 0;
1834 queue_head_t vm_lopage_queue_free;
1835
1836 vm_page_t
1837 vm_page_grablo(void)
1838 {
1839 vm_page_t mem;
1840
1841 if (vm_lopage_needed == FALSE)
1842 return (vm_page_grab());
1843
1844 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1845
1846 if ( !queue_empty(&vm_lopage_queue_free)) {
1847 queue_remove_first(&vm_lopage_queue_free,
1848 mem,
1849 vm_page_t,
1850 pageq);
1851 assert(vm_lopage_free_count);
1852
1853 vm_lopage_free_count--;
1854 vm_lopages_allocated_q++;
1855
1856 if (vm_lopage_free_count < vm_lopage_lowater)
1857 vm_lopage_refill = TRUE;
1858
1859 lck_mtx_unlock(&vm_page_queue_free_lock);
1860 } else {
1861 lck_mtx_unlock(&vm_page_queue_free_lock);
1862
1863 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1864
1865 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1866 vm_lopages_allocated_cpm_failed++;
1867 lck_mtx_unlock(&vm_page_queue_free_lock);
1868
1869 return (VM_PAGE_NULL);
1870 }
1871 mem->busy = TRUE;
1872
1873 vm_page_lockspin_queues();
1874
1875 mem->gobbled = FALSE;
1876 vm_page_gobble_count--;
1877 vm_page_wire_count--;
1878
1879 vm_lopages_allocated_cpm_success++;
1880 vm_page_unlock_queues();
1881 }
1882 assert(mem->busy);
1883 assert(!mem->free);
1884 assert(!mem->pmapped);
1885 assert(!mem->wpmapped);
1886 assert(!pmap_is_noencrypt(mem->phys_page));
1887
1888 mem->pageq.next = NULL;
1889 mem->pageq.prev = NULL;
1890
1891 return (mem);
1892 }
1893
1894
1895 /*
1896 * vm_page_grab:
1897 *
1898 * first try to grab a page from the per-cpu free list...
1899 * this must be done while pre-emption is disabled... if
1900 * a page is available, we're done...
1901 * if no page is available, grab the vm_page_queue_free_lock
1902 * and see if current number of free pages would allow us
1903 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1904 * if there are pages available, disable preemption and
1905 * recheck the state of the per-cpu free list... we could
1906 * have been preempted and moved to a different cpu, or
1907 * some other thread could have re-filled it... if still
1908 * empty, figure out how many pages we can steal from the
1909 * global free queue and move to the per-cpu queue...
1910 * return 1 of these pages when done... only wakeup the
1911 * pageout_scan thread if we moved pages from the global
1912 * list... no need for the wakeup if we've satisfied the
1913 * request from the per-cpu queue.
1914 */
1915
1916 #define COLOR_GROUPS_TO_STEAL 4
1917
1918
1919 vm_page_t
1920 vm_page_grab( void )
1921 {
1922 vm_page_t mem;
1923
1924
1925 disable_preemption();
1926
1927 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1928 return_page_from_cpu_list:
1929 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1930 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1931 mem->pageq.next = NULL;
1932
1933 enable_preemption();
1934
1935 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1936 assert(mem->tabled == FALSE);
1937 assert(mem->object == VM_OBJECT_NULL);
1938 assert(!mem->laundry);
1939 assert(!mem->free);
1940 assert(pmap_verify_free(mem->phys_page));
1941 assert(mem->busy);
1942 assert(!mem->encrypted);
1943 assert(!mem->pmapped);
1944 assert(!mem->wpmapped);
1945 assert(!mem->active);
1946 assert(!mem->inactive);
1947 assert(!mem->throttled);
1948 assert(!mem->speculative);
1949 assert(!pmap_is_noencrypt(mem->phys_page));
1950
1951 return mem;
1952 }
1953 enable_preemption();
1954
1955
1956 /*
1957 * Optionally produce warnings if the wire or gobble
1958 * counts exceed some threshold.
1959 */
1960 if (vm_page_wire_count_warning > 0
1961 && vm_page_wire_count >= vm_page_wire_count_warning) {
1962 printf("mk: vm_page_grab(): high wired page count of %d\n",
1963 vm_page_wire_count);
1964 assert(vm_page_wire_count < vm_page_wire_count_warning);
1965 }
1966 if (vm_page_gobble_count_warning > 0
1967 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1968 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1969 vm_page_gobble_count);
1970 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1971 }
1972
1973 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1974
1975 /*
1976 * Only let privileged threads (involved in pageout)
1977 * dip into the reserved pool.
1978 */
1979 if ((vm_page_free_count < vm_page_free_reserved) &&
1980 !(current_thread()->options & TH_OPT_VMPRIV)) {
1981 lck_mtx_unlock(&vm_page_queue_free_lock);
1982 mem = VM_PAGE_NULL;
1983 }
1984 else {
1985 vm_page_t head;
1986 vm_page_t tail;
1987 unsigned int pages_to_steal;
1988 unsigned int color;
1989
1990 while ( vm_page_free_count == 0 ) {
1991
1992 lck_mtx_unlock(&vm_page_queue_free_lock);
1993 /*
1994 * must be a privileged thread to be
1995 * in this state since a non-privileged
1996 * thread would have bailed if we were
1997 * under the vm_page_free_reserved mark
1998 */
1999 VM_PAGE_WAIT();
2000 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2001 }
2002
2003 disable_preemption();
2004
2005 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2006 lck_mtx_unlock(&vm_page_queue_free_lock);
2007
2008 /*
2009 * we got preempted and moved to another processor
2010 * or we got preempted and someone else ran and filled the cache
2011 */
2012 goto return_page_from_cpu_list;
2013 }
2014 if (vm_page_free_count <= vm_page_free_reserved)
2015 pages_to_steal = 1;
2016 else {
2017 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
2018
2019 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
2020 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2021 }
2022 color = PROCESSOR_DATA(current_processor(), start_color);
2023 head = tail = NULL;
2024
2025 while (pages_to_steal--) {
2026 if (--vm_page_free_count < vm_page_free_count_minimum)
2027 vm_page_free_count_minimum = vm_page_free_count;
2028
2029 while (queue_empty(&vm_page_queue_free[color]))
2030 color = (color + 1) & vm_color_mask;
2031
2032 queue_remove_first(&vm_page_queue_free[color],
2033 mem,
2034 vm_page_t,
2035 pageq);
2036 mem->pageq.next = NULL;
2037 mem->pageq.prev = NULL;
2038
2039 assert(!mem->active);
2040 assert(!mem->inactive);
2041 assert(!mem->throttled);
2042 assert(!mem->speculative);
2043
2044 color = (color + 1) & vm_color_mask;
2045
2046 if (head == NULL)
2047 head = mem;
2048 else
2049 tail->pageq.next = (queue_t)mem;
2050 tail = mem;
2051
2052 mem->pageq.prev = NULL;
2053 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2054 assert(mem->tabled == FALSE);
2055 assert(mem->object == VM_OBJECT_NULL);
2056 assert(!mem->laundry);
2057 assert(mem->free);
2058 mem->free = FALSE;
2059
2060 assert(pmap_verify_free(mem->phys_page));
2061 assert(mem->busy);
2062 assert(!mem->free);
2063 assert(!mem->encrypted);
2064 assert(!mem->pmapped);
2065 assert(!mem->wpmapped);
2066 assert(!pmap_is_noencrypt(mem->phys_page));
2067 }
2068 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2069 PROCESSOR_DATA(current_processor(), start_color) = color;
2070
2071 /*
2072 * satisfy this request
2073 */
2074 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2075 mem = head;
2076 mem->pageq.next = NULL;
2077
2078 lck_mtx_unlock(&vm_page_queue_free_lock);
2079
2080 enable_preemption();
2081 }
2082 /*
2083 * Decide if we should poke the pageout daemon.
2084 * We do this if the free count is less than the low
2085 * water mark, or if the free count is less than the high
2086 * water mark (but above the low water mark) and the inactive
2087 * count is less than its target.
2088 *
2089 * We don't have the counts locked ... if they change a little,
2090 * it doesn't really matter.
2091 */
2092 if ((vm_page_free_count < vm_page_free_min) ||
2093 ((vm_page_free_count < vm_page_free_target) &&
2094 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2095 thread_wakeup((event_t) &vm_page_free_wanted);
2096
2097 VM_CHECK_MEMORYSTATUS;
2098
2099 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2100
2101 return mem;
2102 }
2103
2104 /*
2105 * vm_page_release:
2106 *
2107 * Return a page to the free list.
2108 */
2109
2110 void
2111 vm_page_release(
2112 register vm_page_t mem)
2113 {
2114 unsigned int color;
2115 int need_wakeup = 0;
2116 int need_priv_wakeup = 0;
2117
2118
2119 assert(!mem->private && !mem->fictitious);
2120 if (vm_page_free_verify) {
2121 assert(pmap_verify_free(mem->phys_page));
2122 }
2123 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2124
2125 pmap_clear_noencrypt(mem->phys_page);
2126
2127 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2128 #if DEBUG
2129 if (mem->free)
2130 panic("vm_page_release");
2131 #endif
2132
2133 assert(mem->busy);
2134 assert(!mem->laundry);
2135 assert(mem->object == VM_OBJECT_NULL);
2136 assert(mem->pageq.next == NULL &&
2137 mem->pageq.prev == NULL);
2138 assert(mem->listq.next == NULL &&
2139 mem->listq.prev == NULL);
2140
2141 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2142 vm_lopage_free_count < vm_lopage_free_limit &&
2143 mem->phys_page < max_valid_low_ppnum) {
2144 /*
2145 * this exists to support hardware controllers
2146 * incapable of generating DMAs with more than 32 bits
2147 * of address on platforms with physical memory > 4G...
2148 */
2149 queue_enter_first(&vm_lopage_queue_free,
2150 mem,
2151 vm_page_t,
2152 pageq);
2153 vm_lopage_free_count++;
2154
2155 if (vm_lopage_free_count >= vm_lopage_free_limit)
2156 vm_lopage_refill = FALSE;
2157
2158 mem->lopage = TRUE;
2159 } else {
2160 mem->lopage = FALSE;
2161 mem->free = TRUE;
2162
2163 color = mem->phys_page & vm_color_mask;
2164 queue_enter_first(&vm_page_queue_free[color],
2165 mem,
2166 vm_page_t,
2167 pageq);
2168 vm_page_free_count++;
2169 /*
2170 * Check if we should wake up someone waiting for page.
2171 * But don't bother waking them unless they can allocate.
2172 *
2173 * We wakeup only one thread, to prevent starvation.
2174 * Because the scheduling system handles wait queues FIFO,
2175 * if we wakeup all waiting threads, one greedy thread
2176 * can starve multiple niceguy threads. When the threads
2177 * all wakeup, the greedy threads runs first, grabs the page,
2178 * and waits for another page. It will be the first to run
2179 * when the next page is freed.
2180 *
2181 * However, there is a slight danger here.
2182 * The thread we wake might not use the free page.
2183 * Then the other threads could wait indefinitely
2184 * while the page goes unused. To forestall this,
2185 * the pageout daemon will keep making free pages
2186 * as long as vm_page_free_wanted is non-zero.
2187 */
2188
2189 assert(vm_page_free_count > 0);
2190 if (vm_page_free_wanted_privileged > 0) {
2191 vm_page_free_wanted_privileged--;
2192 need_priv_wakeup = 1;
2193 } else if (vm_page_free_wanted > 0 &&
2194 vm_page_free_count > vm_page_free_reserved) {
2195 vm_page_free_wanted--;
2196 need_wakeup = 1;
2197 }
2198 }
2199 lck_mtx_unlock(&vm_page_queue_free_lock);
2200
2201 if (need_priv_wakeup)
2202 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2203 else if (need_wakeup)
2204 thread_wakeup_one((event_t) &vm_page_free_count);
2205
2206 VM_CHECK_MEMORYSTATUS;
2207 }
2208
2209 /*
2210 * vm_page_wait:
2211 *
2212 * Wait for a page to become available.
2213 * If there are plenty of free pages, then we don't sleep.
2214 *
2215 * Returns:
2216 * TRUE: There may be another page, try again
2217 * FALSE: We were interrupted out of our wait, don't try again
2218 */
2219
2220 boolean_t
2221 vm_page_wait(
2222 int interruptible )
2223 {
2224 /*
2225 * We can't use vm_page_free_reserved to make this
2226 * determination. Consider: some thread might
2227 * need to allocate two pages. The first allocation
2228 * succeeds, the second fails. After the first page is freed,
2229 * a call to vm_page_wait must really block.
2230 */
2231 kern_return_t wait_result;
2232 int need_wakeup = 0;
2233 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2234
2235 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2236
2237 if (is_privileged && vm_page_free_count) {
2238 lck_mtx_unlock(&vm_page_queue_free_lock);
2239 return TRUE;
2240 }
2241 if (vm_page_free_count < vm_page_free_target) {
2242
2243 if (is_privileged) {
2244 if (vm_page_free_wanted_privileged++ == 0)
2245 need_wakeup = 1;
2246 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2247 } else {
2248 if (vm_page_free_wanted++ == 0)
2249 need_wakeup = 1;
2250 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2251 }
2252 lck_mtx_unlock(&vm_page_queue_free_lock);
2253 counter(c_vm_page_wait_block++);
2254
2255 if (need_wakeup)
2256 thread_wakeup((event_t)&vm_page_free_wanted);
2257
2258 if (wait_result == THREAD_WAITING) {
2259 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2260 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2261 wait_result = thread_block(THREAD_CONTINUE_NULL);
2262 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2263 }
2264
2265 return(wait_result == THREAD_AWAKENED);
2266 } else {
2267 lck_mtx_unlock(&vm_page_queue_free_lock);
2268 return TRUE;
2269 }
2270 }
2271
2272 /*
2273 * vm_page_alloc:
2274 *
2275 * Allocate and return a memory cell associated
2276 * with this VM object/offset pair.
2277 *
2278 * Object must be locked.
2279 */
2280
2281 vm_page_t
2282 vm_page_alloc(
2283 vm_object_t object,
2284 vm_object_offset_t offset)
2285 {
2286 register vm_page_t mem;
2287
2288 vm_object_lock_assert_exclusive(object);
2289 mem = vm_page_grab();
2290 if (mem == VM_PAGE_NULL)
2291 return VM_PAGE_NULL;
2292
2293 vm_page_insert(mem, object, offset);
2294
2295 return(mem);
2296 }
2297
2298 vm_page_t
2299 vm_page_alloclo(
2300 vm_object_t object,
2301 vm_object_offset_t offset)
2302 {
2303 register vm_page_t mem;
2304
2305 vm_object_lock_assert_exclusive(object);
2306 mem = vm_page_grablo();
2307 if (mem == VM_PAGE_NULL)
2308 return VM_PAGE_NULL;
2309
2310 vm_page_insert(mem, object, offset);
2311
2312 return(mem);
2313 }
2314
2315
2316 /*
2317 * vm_page_alloc_guard:
2318 *
2319 * Allocate a fictitious page which will be used
2320 * as a guard page. The page will be inserted into
2321 * the object and returned to the caller.
2322 */
2323
2324 vm_page_t
2325 vm_page_alloc_guard(
2326 vm_object_t object,
2327 vm_object_offset_t offset)
2328 {
2329 register vm_page_t mem;
2330
2331 vm_object_lock_assert_exclusive(object);
2332 mem = vm_page_grab_guard();
2333 if (mem == VM_PAGE_NULL)
2334 return VM_PAGE_NULL;
2335
2336 vm_page_insert(mem, object, offset);
2337
2338 return(mem);
2339 }
2340
2341
2342 counter(unsigned int c_laundry_pages_freed = 0;)
2343
2344 /*
2345 * vm_page_free_prepare:
2346 *
2347 * Removes page from any queue it may be on
2348 * and disassociates it from its VM object.
2349 *
2350 * Object and page queues must be locked prior to entry.
2351 */
2352 static void
2353 vm_page_free_prepare(
2354 vm_page_t mem)
2355 {
2356 vm_page_free_prepare_queues(mem);
2357 vm_page_free_prepare_object(mem, TRUE);
2358 }
2359
2360
2361 void
2362 vm_page_free_prepare_queues(
2363 vm_page_t mem)
2364 {
2365 VM_PAGE_CHECK(mem);
2366 assert(!mem->free);
2367 assert(!mem->cleaning);
2368 #if DEBUG
2369 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2370 if (mem->free)
2371 panic("vm_page_free: freeing page on free list\n");
2372 #endif
2373 if (mem->object) {
2374 vm_object_lock_assert_exclusive(mem->object);
2375 }
2376 if (mem->laundry) {
2377 /*
2378 * We may have to free a page while it's being laundered
2379 * if we lost its pager (due to a forced unmount, for example).
2380 * We need to call vm_pageout_steal_laundry() before removing
2381 * the page from its VM object, so that we can remove it
2382 * from its pageout queue and adjust the laundry accounting
2383 */
2384 vm_pageout_steal_laundry(mem, TRUE);
2385 counter(++c_laundry_pages_freed);
2386 }
2387
2388 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2389
2390 if (VM_PAGE_WIRED(mem)) {
2391 if (mem->object) {
2392 assert(mem->object->wired_page_count > 0);
2393 mem->object->wired_page_count--;
2394 assert(mem->object->resident_page_count >=
2395 mem->object->wired_page_count);
2396
2397 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2398 OSAddAtomic(+1, &vm_page_purgeable_count);
2399 assert(vm_page_purgeable_wired_count > 0);
2400 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2401 }
2402 }
2403 if (!mem->private && !mem->fictitious)
2404 vm_page_wire_count--;
2405 mem->wire_count = 0;
2406 assert(!mem->gobbled);
2407 } else if (mem->gobbled) {
2408 if (!mem->private && !mem->fictitious)
2409 vm_page_wire_count--;
2410 vm_page_gobble_count--;
2411 }
2412 }
2413
2414
2415 void
2416 vm_page_free_prepare_object(
2417 vm_page_t mem,
2418 boolean_t remove_from_hash)
2419 {
2420 if (mem->tabled)
2421 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2422
2423 PAGE_WAKEUP(mem); /* clears wanted */
2424
2425 if (mem->private) {
2426 mem->private = FALSE;
2427 mem->fictitious = TRUE;
2428 mem->phys_page = vm_page_fictitious_addr;
2429 }
2430 if ( !mem->fictitious) {
2431 vm_page_init(mem, mem->phys_page, mem->lopage);
2432 }
2433 }
2434
2435
2436 /*
2437 * vm_page_free:
2438 *
2439 * Returns the given page to the free list,
2440 * disassociating it with any VM object.
2441 *
2442 * Object and page queues must be locked prior to entry.
2443 */
2444 void
2445 vm_page_free(
2446 vm_page_t mem)
2447 {
2448 vm_page_free_prepare(mem);
2449
2450 if (mem->fictitious) {
2451 vm_page_release_fictitious(mem);
2452 } else {
2453 vm_page_release(mem);
2454 }
2455 }
2456
2457
2458 void
2459 vm_page_free_unlocked(
2460 vm_page_t mem,
2461 boolean_t remove_from_hash)
2462 {
2463 vm_page_lockspin_queues();
2464 vm_page_free_prepare_queues(mem);
2465 vm_page_unlock_queues();
2466
2467 vm_page_free_prepare_object(mem, remove_from_hash);
2468
2469 if (mem->fictitious) {
2470 vm_page_release_fictitious(mem);
2471 } else {
2472 vm_page_release(mem);
2473 }
2474 }
2475
2476
2477 /*
2478 * Free a list of pages. The list can be up to several hundred pages,
2479 * as blocked up by vm_pageout_scan().
2480 * The big win is not having to take the free list lock once
2481 * per page.
2482 */
2483 void
2484 vm_page_free_list(
2485 vm_page_t freeq,
2486 boolean_t prepare_object)
2487 {
2488 vm_page_t mem;
2489 vm_page_t nxt;
2490 vm_page_t local_freeq;
2491 int pg_count;
2492
2493 while (freeq) {
2494
2495 pg_count = 0;
2496 local_freeq = VM_PAGE_NULL;
2497 mem = freeq;
2498
2499 /*
2500 * break up the processing into smaller chunks so
2501 * that we can 'pipeline' the pages onto the
2502 * free list w/o introducing too much
2503 * contention on the global free queue lock
2504 */
2505 while (mem && pg_count < 64) {
2506
2507 assert(!mem->inactive);
2508 assert(!mem->active);
2509 assert(!mem->throttled);
2510 assert(!mem->free);
2511 assert(!mem->speculative);
2512 assert(!VM_PAGE_WIRED(mem));
2513 assert(mem->pageq.prev == NULL);
2514
2515 nxt = (vm_page_t)(mem->pageq.next);
2516
2517 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2518 assert(pmap_verify_free(mem->phys_page));
2519 }
2520 if (prepare_object == TRUE)
2521 vm_page_free_prepare_object(mem, TRUE);
2522
2523 if (!mem->fictitious) {
2524 assert(mem->busy);
2525
2526 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2527 vm_lopage_free_count < vm_lopage_free_limit &&
2528 mem->phys_page < max_valid_low_ppnum) {
2529 mem->pageq.next = NULL;
2530 vm_page_release(mem);
2531 } else {
2532 /*
2533 * IMPORTANT: we can't set the page "free" here
2534 * because that would make the page eligible for
2535 * a physically-contiguous allocation (see
2536 * vm_page_find_contiguous()) right away (we don't
2537 * hold the vm_page_queue_free lock). That would
2538 * cause trouble because the page is not actually
2539 * in the free queue yet...
2540 */
2541 mem->pageq.next = (queue_entry_t)local_freeq;
2542 local_freeq = mem;
2543 pg_count++;
2544
2545 pmap_clear_noencrypt(mem->phys_page);
2546 }
2547 } else {
2548 assert(mem->phys_page == vm_page_fictitious_addr ||
2549 mem->phys_page == vm_page_guard_addr);
2550 vm_page_release_fictitious(mem);
2551 }
2552 mem = nxt;
2553 }
2554 freeq = mem;
2555
2556 if ( (mem = local_freeq) ) {
2557 unsigned int avail_free_count;
2558 unsigned int need_wakeup = 0;
2559 unsigned int need_priv_wakeup = 0;
2560
2561 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2562
2563 while (mem) {
2564 int color;
2565
2566 nxt = (vm_page_t)(mem->pageq.next);
2567
2568 assert(!mem->free);
2569 assert(mem->busy);
2570 mem->free = TRUE;
2571
2572 color = mem->phys_page & vm_color_mask;
2573 queue_enter_first(&vm_page_queue_free[color],
2574 mem,
2575 vm_page_t,
2576 pageq);
2577 mem = nxt;
2578 }
2579 vm_page_free_count += pg_count;
2580 avail_free_count = vm_page_free_count;
2581
2582 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2583
2584 if (avail_free_count < vm_page_free_wanted_privileged) {
2585 need_priv_wakeup = avail_free_count;
2586 vm_page_free_wanted_privileged -= avail_free_count;
2587 avail_free_count = 0;
2588 } else {
2589 need_priv_wakeup = vm_page_free_wanted_privileged;
2590 vm_page_free_wanted_privileged = 0;
2591 avail_free_count -= vm_page_free_wanted_privileged;
2592 }
2593 }
2594 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2595 unsigned int available_pages;
2596
2597 available_pages = avail_free_count - vm_page_free_reserved;
2598
2599 if (available_pages >= vm_page_free_wanted) {
2600 need_wakeup = vm_page_free_wanted;
2601 vm_page_free_wanted = 0;
2602 } else {
2603 need_wakeup = available_pages;
2604 vm_page_free_wanted -= available_pages;
2605 }
2606 }
2607 lck_mtx_unlock(&vm_page_queue_free_lock);
2608
2609 if (need_priv_wakeup != 0) {
2610 /*
2611 * There shouldn't be that many VM-privileged threads,
2612 * so let's wake them all up, even if we don't quite
2613 * have enough pages to satisfy them all.
2614 */
2615 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2616 }
2617 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2618 /*
2619 * We don't expect to have any more waiters
2620 * after this, so let's wake them all up at
2621 * once.
2622 */
2623 thread_wakeup((event_t) &vm_page_free_count);
2624 } else for (; need_wakeup != 0; need_wakeup--) {
2625 /*
2626 * Wake up one waiter per page we just released.
2627 */
2628 thread_wakeup_one((event_t) &vm_page_free_count);
2629 }
2630
2631 VM_CHECK_MEMORYSTATUS;
2632 }
2633 }
2634 }
2635
2636
2637 /*
2638 * vm_page_wire:
2639 *
2640 * Mark this page as wired down by yet
2641 * another map, removing it from paging queues
2642 * as necessary.
2643 *
2644 * The page's object and the page queues must be locked.
2645 */
2646 void
2647 vm_page_wire(
2648 register vm_page_t mem)
2649 {
2650
2651 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2652
2653 VM_PAGE_CHECK(mem);
2654 if (mem->object) {
2655 vm_object_lock_assert_exclusive(mem->object);
2656 } else {
2657 /*
2658 * In theory, the page should be in an object before it
2659 * gets wired, since we need to hold the object lock
2660 * to update some fields in the page structure.
2661 * However, some code (i386 pmap, for example) might want
2662 * to wire a page before it gets inserted into an object.
2663 * That's somewhat OK, as long as nobody else can get to
2664 * that page and update it at the same time.
2665 */
2666 }
2667 #if DEBUG
2668 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2669 #endif
2670 if ( !VM_PAGE_WIRED(mem)) {
2671
2672 if (mem->pageout_queue) {
2673 mem->pageout = FALSE;
2674 vm_pageout_throttle_up(mem);
2675 }
2676 VM_PAGE_QUEUES_REMOVE(mem);
2677
2678 if (mem->object) {
2679 mem->object->wired_page_count++;
2680 assert(mem->object->resident_page_count >=
2681 mem->object->wired_page_count);
2682 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2683 assert(vm_page_purgeable_count > 0);
2684 OSAddAtomic(-1, &vm_page_purgeable_count);
2685 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2686 }
2687 if (mem->object->all_reusable) {
2688 /*
2689 * Wired pages are not counted as "re-usable"
2690 * in "all_reusable" VM objects, so nothing
2691 * to do here.
2692 */
2693 } else if (mem->reusable) {
2694 /*
2695 * This page is not "re-usable" when it's
2696 * wired, so adjust its state and the
2697 * accounting.
2698 */
2699 vm_object_reuse_pages(mem->object,
2700 mem->offset,
2701 mem->offset+PAGE_SIZE_64,
2702 FALSE);
2703 }
2704 }
2705 assert(!mem->reusable);
2706
2707 if (!mem->private && !mem->fictitious && !mem->gobbled)
2708 vm_page_wire_count++;
2709 if (mem->gobbled)
2710 vm_page_gobble_count--;
2711 mem->gobbled = FALSE;
2712
2713 VM_CHECK_MEMORYSTATUS;
2714
2715 /*
2716 * ENCRYPTED SWAP:
2717 * The page could be encrypted, but
2718 * We don't have to decrypt it here
2719 * because we don't guarantee that the
2720 * data is actually valid at this point.
2721 * The page will get decrypted in
2722 * vm_fault_wire() if needed.
2723 */
2724 }
2725 assert(!mem->gobbled);
2726 mem->wire_count++;
2727 VM_PAGE_CHECK(mem);
2728 }
2729
2730 /*
2731 * vm_page_gobble:
2732 *
2733 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2734 *
2735 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2736 */
2737 void
2738 vm_page_gobble(
2739 register vm_page_t mem)
2740 {
2741 vm_page_lockspin_queues();
2742 VM_PAGE_CHECK(mem);
2743
2744 assert(!mem->gobbled);
2745 assert( !VM_PAGE_WIRED(mem));
2746
2747 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2748 if (!mem->private && !mem->fictitious)
2749 vm_page_wire_count++;
2750 }
2751 vm_page_gobble_count++;
2752 mem->gobbled = TRUE;
2753 vm_page_unlock_queues();
2754 }
2755
2756 /*
2757 * vm_page_unwire:
2758 *
2759 * Release one wiring of this page, potentially
2760 * enabling it to be paged again.
2761 *
2762 * The page's object and the page queues must be locked.
2763 */
2764 void
2765 vm_page_unwire(
2766 vm_page_t mem,
2767 boolean_t queueit)
2768 {
2769
2770 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2771
2772 VM_PAGE_CHECK(mem);
2773 assert(VM_PAGE_WIRED(mem));
2774 assert(mem->object != VM_OBJECT_NULL);
2775 #if DEBUG
2776 vm_object_lock_assert_exclusive(mem->object);
2777 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2778 #endif
2779 if (--mem->wire_count == 0) {
2780 assert(!mem->private && !mem->fictitious);
2781 vm_page_wire_count--;
2782 assert(mem->object->wired_page_count > 0);
2783 mem->object->wired_page_count--;
2784 assert(mem->object->resident_page_count >=
2785 mem->object->wired_page_count);
2786 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2787 OSAddAtomic(+1, &vm_page_purgeable_count);
2788 assert(vm_page_purgeable_wired_count > 0);
2789 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2790 }
2791 assert(!mem->laundry);
2792 assert(mem->object != kernel_object);
2793 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2794
2795 if (queueit == TRUE) {
2796 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2797 vm_page_deactivate(mem);
2798 } else {
2799 vm_page_activate(mem);
2800 }
2801 }
2802
2803 VM_CHECK_MEMORYSTATUS;
2804
2805 }
2806 VM_PAGE_CHECK(mem);
2807 }
2808
2809 /*
2810 * vm_page_deactivate:
2811 *
2812 * Returns the given page to the inactive list,
2813 * indicating that no physical maps have access
2814 * to this page. [Used by the physical mapping system.]
2815 *
2816 * The page queues must be locked.
2817 */
2818 void
2819 vm_page_deactivate(
2820 vm_page_t m)
2821 {
2822 vm_page_deactivate_internal(m, TRUE);
2823 }
2824
2825
2826 void
2827 vm_page_deactivate_internal(
2828 vm_page_t m,
2829 boolean_t clear_hw_reference)
2830 {
2831
2832 VM_PAGE_CHECK(m);
2833 assert(m->object != kernel_object);
2834 assert(m->phys_page != vm_page_guard_addr);
2835
2836 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2837 #if DEBUG
2838 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2839 #endif
2840 /*
2841 * This page is no longer very interesting. If it was
2842 * interesting (active or inactive/referenced), then we
2843 * clear the reference bit and (re)enter it in the
2844 * inactive queue. Note wired pages should not have
2845 * their reference bit cleared.
2846 */
2847 assert ( !(m->absent && !m->unusual));
2848
2849 if (m->gobbled) { /* can this happen? */
2850 assert( !VM_PAGE_WIRED(m));
2851
2852 if (!m->private && !m->fictitious)
2853 vm_page_wire_count--;
2854 vm_page_gobble_count--;
2855 m->gobbled = FALSE;
2856 }
2857 /*
2858 * if this page is currently on the pageout queue, we can't do the
2859 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2860 * and we can't remove it manually since we would need the object lock
2861 * (which is not required here) to decrement the activity_in_progress
2862 * reference which is held on the object while the page is in the pageout queue...
2863 * just let the normal laundry processing proceed
2864 */
2865 if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
2866 return;
2867
2868 if (!m->absent && clear_hw_reference == TRUE)
2869 pmap_clear_reference(m->phys_page);
2870
2871 m->reference = FALSE;
2872 m->no_cache = FALSE;
2873
2874 if (!m->inactive) {
2875 VM_PAGE_QUEUES_REMOVE(m);
2876
2877 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2878 m->dirty && m->object->internal &&
2879 (m->object->purgable == VM_PURGABLE_DENY ||
2880 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2881 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2882 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2883 m->throttled = TRUE;
2884 vm_page_throttled_count++;
2885 } else {
2886 if (m->object->named && m->object->ref_count == 1) {
2887 vm_page_speculate(m, FALSE);
2888 #if DEVELOPMENT || DEBUG
2889 vm_page_speculative_recreated++;
2890 #endif
2891 } else {
2892 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2893 }
2894 }
2895 }
2896 }
2897
2898 /*
2899 * vm_page_enqueue_cleaned
2900 *
2901 * Put the page on the cleaned queue, mark it cleaned, etc.
2902 * Being on the cleaned queue (and having m->clean_queue set)
2903 * does ** NOT ** guarantee that the page is clean!
2904 *
2905 * Call with the queues lock held.
2906 */
2907
2908 void vm_page_enqueue_cleaned(vm_page_t m)
2909 {
2910 assert(m->phys_page != vm_page_guard_addr);
2911 #if DEBUG
2912 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2913 #endif
2914 assert( !(m->absent && !m->unusual));
2915
2916 if (m->gobbled) {
2917 assert( !VM_PAGE_WIRED(m));
2918 if (!m->private && !m->fictitious)
2919 vm_page_wire_count--;
2920 vm_page_gobble_count--;
2921 m->gobbled = FALSE;
2922 }
2923 /*
2924 * if this page is currently on the pageout queue, we can't do the
2925 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2926 * and we can't remove it manually since we would need the object lock
2927 * (which is not required here) to decrement the activity_in_progress
2928 * reference which is held on the object while the page is in the pageout queue...
2929 * just let the normal laundry processing proceed
2930 */
2931 if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2932 return;
2933
2934 VM_PAGE_QUEUES_REMOVE(m);
2935
2936 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2937 m->clean_queue = TRUE;
2938 vm_page_cleaned_count++;
2939
2940 m->inactive = TRUE;
2941 vm_page_inactive_count++;
2942 if (m->object->internal) {
2943 vm_page_pageable_internal_count++;
2944 } else {
2945 vm_page_pageable_external_count++;
2946 }
2947
2948 vm_pageout_enqueued_cleaned++;
2949 }
2950
2951 /*
2952 * vm_page_activate:
2953 *
2954 * Put the specified page on the active list (if appropriate).
2955 *
2956 * The page queues must be locked.
2957 */
2958
2959 #if CONFIG_JETSAM
2960 #if LATENCY_JETSAM
2961 extern struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS];
2962 #endif /* LATENCY_JETSAM */
2963 #endif /* CONFIG_JETSAM */
2964
2965 void
2966 vm_page_activate(
2967 register vm_page_t m)
2968 {
2969 VM_PAGE_CHECK(m);
2970 #ifdef FIXME_4778297
2971 assert(m->object != kernel_object);
2972 #endif
2973 assert(m->phys_page != vm_page_guard_addr);
2974 #if DEBUG
2975 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2976 #endif
2977 assert( !(m->absent && !m->unusual));
2978
2979 if (m->gobbled) {
2980 assert( !VM_PAGE_WIRED(m));
2981 if (!m->private && !m->fictitious)
2982 vm_page_wire_count--;
2983 vm_page_gobble_count--;
2984 m->gobbled = FALSE;
2985 }
2986 /*
2987 * if this page is currently on the pageout queue, we can't do the
2988 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2989 * and we can't remove it manually since we would need the object lock
2990 * (which is not required here) to decrement the activity_in_progress
2991 * reference which is held on the object while the page is in the pageout queue...
2992 * just let the normal laundry processing proceed
2993 */
2994 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
2995 return;
2996
2997 #if DEBUG
2998 if (m->active)
2999 panic("vm_page_activate: already active");
3000 #endif
3001
3002 if (m->speculative) {
3003 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3004 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3005 }
3006
3007 VM_PAGE_QUEUES_REMOVE(m);
3008
3009 if ( !VM_PAGE_WIRED(m)) {
3010
3011 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3012 m->dirty && m->object->internal &&
3013 (m->object->purgable == VM_PURGABLE_DENY ||
3014 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3015 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3016 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3017 m->throttled = TRUE;
3018 vm_page_throttled_count++;
3019 } else {
3020 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3021 m->active = TRUE;
3022 vm_page_active_count++;
3023 if (m->object->internal) {
3024 vm_page_pageable_internal_count++;
3025 } else {
3026 vm_page_pageable_external_count++;
3027 }
3028 #if LATENCY_JETSAM
3029 if (jlp_init) {
3030 uint64_t now = mach_absolute_time();
3031 uint64_t delta = now - jlp_time;
3032 clock_sec_t jl_secs = 0;
3033 clock_usec_t jl_usecs = 0;
3034 vm_page_t jlp;
3035
3036 absolutetime_to_microtime(delta, &jl_secs, &jl_usecs);
3037
3038 jl_usecs += jl_secs * USEC_PER_SEC;
3039 if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) {
3040
3041 jlp = &jetsam_latency_page[jlp_current];
3042 if (jlp->active) {
3043 queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq);
3044 }
3045 queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq);
3046
3047 jlp->active = TRUE;
3048
3049 jlp->offset = now;
3050 jlp_time = jlp->offset;
3051
3052 if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) {
3053 jlp_current = 0;
3054 }
3055
3056 }
3057 }
3058 #endif /* LATENCY_JETSAM */
3059 }
3060 m->reference = TRUE;
3061 m->no_cache = FALSE;
3062 }
3063 VM_PAGE_CHECK(m);
3064 }
3065
3066
3067 /*
3068 * vm_page_speculate:
3069 *
3070 * Put the specified page on the speculative list (if appropriate).
3071 *
3072 * The page queues must be locked.
3073 */
3074 void
3075 vm_page_speculate(
3076 vm_page_t m,
3077 boolean_t new)
3078 {
3079 struct vm_speculative_age_q *aq;
3080
3081 VM_PAGE_CHECK(m);
3082 assert(m->object != kernel_object);
3083 assert(m->phys_page != vm_page_guard_addr);
3084 #if DEBUG
3085 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3086 #endif
3087 assert( !(m->absent && !m->unusual));
3088
3089 /*
3090 * if this page is currently on the pageout queue, we can't do the
3091 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3092 * and we can't remove it manually since we would need the object lock
3093 * (which is not required here) to decrement the activity_in_progress
3094 * reference which is held on the object while the page is in the pageout queue...
3095 * just let the normal laundry processing proceed
3096 */
3097 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
3098 return;
3099
3100 VM_PAGE_QUEUES_REMOVE(m);
3101
3102 if ( !VM_PAGE_WIRED(m)) {
3103 mach_timespec_t ts;
3104 clock_sec_t sec;
3105 clock_nsec_t nsec;
3106
3107 clock_get_system_nanotime(&sec, &nsec);
3108 ts.tv_sec = (unsigned int) sec;
3109 ts.tv_nsec = nsec;
3110
3111 if (vm_page_speculative_count == 0) {
3112
3113 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3114 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3115
3116 aq = &vm_page_queue_speculative[speculative_age_index];
3117
3118 /*
3119 * set the timer to begin a new group
3120 */
3121 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3122 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3123
3124 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3125 } else {
3126 aq = &vm_page_queue_speculative[speculative_age_index];
3127
3128 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3129
3130 speculative_age_index++;
3131
3132 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3133 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3134 if (speculative_age_index == speculative_steal_index) {
3135 speculative_steal_index = speculative_age_index + 1;
3136
3137 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3138 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3139 }
3140 aq = &vm_page_queue_speculative[speculative_age_index];
3141
3142 if (!queue_empty(&aq->age_q))
3143 vm_page_speculate_ageit(aq);
3144
3145 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3146 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3147
3148 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3149 }
3150 }
3151 enqueue_tail(&aq->age_q, &m->pageq);
3152 m->speculative = TRUE;
3153 vm_page_speculative_count++;
3154 if (m->object->internal) {
3155 vm_page_pageable_internal_count++;
3156 } else {
3157 vm_page_pageable_external_count++;
3158 }
3159
3160 if (new == TRUE) {
3161 vm_object_lock_assert_exclusive(m->object);
3162
3163 m->object->pages_created++;
3164 #if DEVELOPMENT || DEBUG
3165 vm_page_speculative_created++;
3166 #endif
3167 }
3168 }
3169 VM_PAGE_CHECK(m);
3170 }
3171
3172
3173 /*
3174 * move pages from the specified aging bin to
3175 * the speculative bin that pageout_scan claims from
3176 *
3177 * The page queues must be locked.
3178 */
3179 void
3180 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3181 {
3182 struct vm_speculative_age_q *sq;
3183 vm_page_t t;
3184
3185 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3186
3187 if (queue_empty(&sq->age_q)) {
3188 sq->age_q.next = aq->age_q.next;
3189 sq->age_q.prev = aq->age_q.prev;
3190
3191 t = (vm_page_t)sq->age_q.next;
3192 t->pageq.prev = &sq->age_q;
3193
3194 t = (vm_page_t)sq->age_q.prev;
3195 t->pageq.next = &sq->age_q;
3196 } else {
3197 t = (vm_page_t)sq->age_q.prev;
3198 t->pageq.next = aq->age_q.next;
3199
3200 t = (vm_page_t)aq->age_q.next;
3201 t->pageq.prev = sq->age_q.prev;
3202
3203 t = (vm_page_t)aq->age_q.prev;
3204 t->pageq.next = &sq->age_q;
3205
3206 sq->age_q.prev = aq->age_q.prev;
3207 }
3208 queue_init(&aq->age_q);
3209 }
3210
3211
3212 void
3213 vm_page_lru(
3214 vm_page_t m)
3215 {
3216 VM_PAGE_CHECK(m);
3217 assert(m->object != kernel_object);
3218 assert(m->phys_page != vm_page_guard_addr);
3219
3220 #if DEBUG
3221 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3222 #endif
3223 /*
3224 * if this page is currently on the pageout queue, we can't do the
3225 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3226 * and we can't remove it manually since we would need the object lock
3227 * (which is not required here) to decrement the activity_in_progress
3228 * reference which is held on the object while the page is in the pageout queue...
3229 * just let the normal laundry processing proceed
3230 */
3231 if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3232 return;
3233
3234 m->no_cache = FALSE;
3235
3236 VM_PAGE_QUEUES_REMOVE(m);
3237
3238 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3239 }
3240
3241
3242 void
3243 vm_page_reactivate_all_throttled(void)
3244 {
3245 vm_page_t first_throttled, last_throttled;
3246 vm_page_t first_active;
3247 vm_page_t m;
3248 int extra_active_count;
3249 int extra_internal_count, extra_external_count;
3250
3251 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3252 return;
3253
3254 extra_active_count = 0;
3255 extra_internal_count = 0;
3256 extra_external_count = 0;
3257 vm_page_lock_queues();
3258 if (! queue_empty(&vm_page_queue_throttled)) {
3259 /*
3260 * Switch "throttled" pages to "active".
3261 */
3262 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3263 VM_PAGE_CHECK(m);
3264 assert(m->throttled);
3265 assert(!m->active);
3266 assert(!m->inactive);
3267 assert(!m->speculative);
3268 assert(!VM_PAGE_WIRED(m));
3269
3270 extra_active_count++;
3271 if (m->object->internal) {
3272 extra_internal_count++;
3273 } else {
3274 extra_external_count++;
3275 }
3276
3277 m->throttled = FALSE;
3278 m->active = TRUE;
3279 VM_PAGE_CHECK(m);
3280 }
3281
3282 /*
3283 * Transfer the entire throttled queue to a regular LRU page queues.
3284 * We insert it at the head of the active queue, so that these pages
3285 * get re-evaluated by the LRU algorithm first, since they've been
3286 * completely out of it until now.
3287 */
3288 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3289 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3290 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3291 if (queue_empty(&vm_page_queue_active)) {
3292 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3293 } else {
3294 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3295 }
3296 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3297 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3298 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3299
3300 #if DEBUG
3301 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3302 #endif
3303 queue_init(&vm_page_queue_throttled);
3304 /*
3305 * Adjust the global page counts.
3306 */
3307 vm_page_active_count += extra_active_count;
3308 vm_page_pageable_internal_count += extra_internal_count;
3309 vm_page_pageable_external_count += extra_external_count;
3310 vm_page_throttled_count = 0;
3311 }
3312 assert(vm_page_throttled_count == 0);
3313 assert(queue_empty(&vm_page_queue_throttled));
3314 vm_page_unlock_queues();
3315 }
3316
3317
3318 /*
3319 * move pages from the indicated local queue to the global active queue
3320 * its ok to fail if we're below the hard limit and force == FALSE
3321 * the nolocks == TRUE case is to allow this function to be run on
3322 * the hibernate path
3323 */
3324
3325 void
3326 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3327 {
3328 struct vpl *lq;
3329 vm_page_t first_local, last_local;
3330 vm_page_t first_active;
3331 vm_page_t m;
3332 uint32_t count = 0;
3333
3334 if (vm_page_local_q == NULL)
3335 return;
3336
3337 lq = &vm_page_local_q[lid].vpl_un.vpl;
3338
3339 if (nolocks == FALSE) {
3340 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3341 if ( !vm_page_trylockspin_queues())
3342 return;
3343 } else
3344 vm_page_lockspin_queues();
3345
3346 VPL_LOCK(&lq->vpl_lock);
3347 }
3348 if (lq->vpl_count) {
3349 /*
3350 * Switch "local" pages to "active".
3351 */
3352 assert(!queue_empty(&lq->vpl_queue));
3353
3354 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3355 VM_PAGE_CHECK(m);
3356 assert(m->local);
3357 assert(!m->active);
3358 assert(!m->inactive);
3359 assert(!m->speculative);
3360 assert(!VM_PAGE_WIRED(m));
3361 assert(!m->throttled);
3362 assert(!m->fictitious);
3363
3364 if (m->local_id != lid)
3365 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3366
3367 m->local_id = 0;
3368 m->local = FALSE;
3369 m->active = TRUE;
3370 VM_PAGE_CHECK(m);
3371
3372 count++;
3373 }
3374 if (count != lq->vpl_count)
3375 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3376
3377 /*
3378 * Transfer the entire local queue to a regular LRU page queues.
3379 */
3380 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3381 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3382 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3383
3384 if (queue_empty(&vm_page_queue_active)) {
3385 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3386 } else {
3387 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3388 }
3389 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3390 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3391 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3392
3393 queue_init(&lq->vpl_queue);
3394 /*
3395 * Adjust the global page counts.
3396 */
3397 vm_page_active_count += lq->vpl_count;
3398 vm_page_pageable_internal_count += lq->vpl_internal_count;
3399 vm_page_pageable_external_count += lq->vpl_external_count;
3400 lq->vpl_count = 0;
3401 lq->vpl_internal_count = 0;
3402 lq->vpl_external_count = 0;
3403 }
3404 assert(queue_empty(&lq->vpl_queue));
3405
3406 if (nolocks == FALSE) {
3407 VPL_UNLOCK(&lq->vpl_lock);
3408 vm_page_unlock_queues();
3409 }
3410 }
3411
3412 /*
3413 * vm_page_part_zero_fill:
3414 *
3415 * Zero-fill a part of the page.
3416 */
3417 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3418 void
3419 vm_page_part_zero_fill(
3420 vm_page_t m,
3421 vm_offset_t m_pa,
3422 vm_size_t len)
3423 {
3424
3425 #if 0
3426 /*
3427 * we don't hold the page queue lock
3428 * so this check isn't safe to make
3429 */
3430 VM_PAGE_CHECK(m);
3431 #endif
3432
3433 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3434 pmap_zero_part_page(m->phys_page, m_pa, len);
3435 #else
3436 vm_page_t tmp;
3437 while (1) {
3438 tmp = vm_page_grab();
3439 if (tmp == VM_PAGE_NULL) {
3440 vm_page_wait(THREAD_UNINT);
3441 continue;
3442 }
3443 break;
3444 }
3445 vm_page_zero_fill(tmp);
3446 if(m_pa != 0) {
3447 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3448 }
3449 if((m_pa + len) < PAGE_SIZE) {
3450 vm_page_part_copy(m, m_pa + len, tmp,
3451 m_pa + len, PAGE_SIZE - (m_pa + len));
3452 }
3453 vm_page_copy(tmp,m);
3454 VM_PAGE_FREE(tmp);
3455 #endif
3456
3457 }
3458
3459 /*
3460 * vm_page_zero_fill:
3461 *
3462 * Zero-fill the specified page.
3463 */
3464 void
3465 vm_page_zero_fill(
3466 vm_page_t m)
3467 {
3468 XPR(XPR_VM_PAGE,
3469 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3470 m->object, m->offset, m, 0,0);
3471 #if 0
3472 /*
3473 * we don't hold the page queue lock
3474 * so this check isn't safe to make
3475 */
3476 VM_PAGE_CHECK(m);
3477 #endif
3478
3479 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3480 pmap_zero_page(m->phys_page);
3481 }
3482
3483 /*
3484 * vm_page_part_copy:
3485 *
3486 * copy part of one page to another
3487 */
3488
3489 void
3490 vm_page_part_copy(
3491 vm_page_t src_m,
3492 vm_offset_t src_pa,
3493 vm_page_t dst_m,
3494 vm_offset_t dst_pa,
3495 vm_size_t len)
3496 {
3497 #if 0
3498 /*
3499 * we don't hold the page queue lock
3500 * so this check isn't safe to make
3501 */
3502 VM_PAGE_CHECK(src_m);
3503 VM_PAGE_CHECK(dst_m);
3504 #endif
3505 pmap_copy_part_page(src_m->phys_page, src_pa,
3506 dst_m->phys_page, dst_pa, len);
3507 }
3508
3509 /*
3510 * vm_page_copy:
3511 *
3512 * Copy one page to another
3513 *
3514 * ENCRYPTED SWAP:
3515 * The source page should not be encrypted. The caller should
3516 * make sure the page is decrypted first, if necessary.
3517 */
3518
3519 int vm_page_copy_cs_validations = 0;
3520 int vm_page_copy_cs_tainted = 0;
3521
3522 void
3523 vm_page_copy(
3524 vm_page_t src_m,
3525 vm_page_t dest_m)
3526 {
3527 XPR(XPR_VM_PAGE,
3528 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3529 src_m->object, src_m->offset,
3530 dest_m->object, dest_m->offset,
3531 0);
3532 #if 0
3533 /*
3534 * we don't hold the page queue lock
3535 * so this check isn't safe to make
3536 */
3537 VM_PAGE_CHECK(src_m);
3538 VM_PAGE_CHECK(dest_m);
3539 #endif
3540 vm_object_lock_assert_held(src_m->object);
3541
3542 /*
3543 * ENCRYPTED SWAP:
3544 * The source page should not be encrypted at this point.
3545 * The destination page will therefore not contain encrypted
3546 * data after the copy.
3547 */
3548 if (src_m->encrypted) {
3549 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3550 }
3551 dest_m->encrypted = FALSE;
3552
3553 if (src_m->object != VM_OBJECT_NULL &&
3554 src_m->object->code_signed) {
3555 /*
3556 * We're copying a page from a code-signed object.
3557 * Whoever ends up mapping the copy page might care about
3558 * the original page's integrity, so let's validate the
3559 * source page now.
3560 */
3561 vm_page_copy_cs_validations++;
3562 vm_page_validate_cs(src_m);
3563 }
3564
3565 if (vm_page_is_slideable(src_m)) {
3566 boolean_t was_busy = src_m->busy;
3567 src_m->busy = TRUE;
3568 (void) vm_page_slide(src_m, 0);
3569 assert(src_m->busy);
3570 if (!was_busy) {
3571 PAGE_WAKEUP_DONE(src_m);
3572 }
3573 }
3574
3575 /*
3576 * Propagate the cs_tainted bit to the copy page. Do not propagate
3577 * the cs_validated bit.
3578 */
3579 dest_m->cs_tainted = src_m->cs_tainted;
3580 if (dest_m->cs_tainted) {
3581 vm_page_copy_cs_tainted++;
3582 }
3583 dest_m->slid = src_m->slid;
3584 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3585 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3586 }
3587
3588 #if MACH_ASSERT
3589 static void
3590 _vm_page_print(
3591 vm_page_t p)
3592 {
3593 printf("vm_page %p: \n", p);
3594 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3595 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3596 printf(" next=%p\n", p->next);
3597 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3598 printf(" wire_count=%u\n", p->wire_count);
3599
3600 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3601 (p->local ? "" : "!"),
3602 (p->inactive ? "" : "!"),
3603 (p->active ? "" : "!"),
3604 (p->pageout_queue ? "" : "!"),
3605 (p->speculative ? "" : "!"),
3606 (p->laundry ? "" : "!"));
3607 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3608 (p->free ? "" : "!"),
3609 (p->reference ? "" : "!"),
3610 (p->gobbled ? "" : "!"),
3611 (p->private ? "" : "!"),
3612 (p->throttled ? "" : "!"));
3613 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3614 (p->busy ? "" : "!"),
3615 (p->wanted ? "" : "!"),
3616 (p->tabled ? "" : "!"),
3617 (p->fictitious ? "" : "!"),
3618 (p->pmapped ? "" : "!"),
3619 (p->wpmapped ? "" : "!"));
3620 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3621 (p->pageout ? "" : "!"),
3622 (p->absent ? "" : "!"),
3623 (p->error ? "" : "!"),
3624 (p->dirty ? "" : "!"),
3625 (p->cleaning ? "" : "!"),
3626 (p->precious ? "" : "!"),
3627 (p->clustered ? "" : "!"));
3628 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3629 (p->overwriting ? "" : "!"),
3630 (p->restart ? "" : "!"),
3631 (p->unusual ? "" : "!"),
3632 (p->encrypted ? "" : "!"),
3633 (p->encrypted_cleaning ? "" : "!"));
3634 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3635 (p->cs_validated ? "" : "!"),
3636 (p->cs_tainted ? "" : "!"),
3637 (p->no_cache ? "" : "!"));
3638
3639 printf("phys_page=0x%x\n", p->phys_page);
3640 }
3641
3642 /*
3643 * Check that the list of pages is ordered by
3644 * ascending physical address and has no holes.
3645 */
3646 static int
3647 vm_page_verify_contiguous(
3648 vm_page_t pages,
3649 unsigned int npages)
3650 {
3651 register vm_page_t m;
3652 unsigned int page_count;
3653 vm_offset_t prev_addr;
3654
3655 prev_addr = pages->phys_page;
3656 page_count = 1;
3657 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3658 if (m->phys_page != prev_addr + 1) {
3659 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3660 m, (long)prev_addr, m->phys_page);
3661 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3662 panic("vm_page_verify_contiguous: not contiguous!");
3663 }
3664 prev_addr = m->phys_page;
3665 ++page_count;
3666 }
3667 if (page_count != npages) {
3668 printf("pages %p actual count 0x%x but requested 0x%x\n",
3669 pages, page_count, npages);
3670 panic("vm_page_verify_contiguous: count error");
3671 }
3672 return 1;
3673 }
3674
3675
3676 /*
3677 * Check the free lists for proper length etc.
3678 */
3679 static unsigned int
3680 vm_page_verify_free_list(
3681 queue_head_t *vm_page_queue,
3682 unsigned int color,
3683 vm_page_t look_for_page,
3684 boolean_t expect_page)
3685 {
3686 unsigned int npages;
3687 vm_page_t m;
3688 vm_page_t prev_m;
3689 boolean_t found_page;
3690
3691 found_page = FALSE;
3692 npages = 0;
3693 prev_m = (vm_page_t) vm_page_queue;
3694 queue_iterate(vm_page_queue,
3695 m,
3696 vm_page_t,
3697 pageq) {
3698
3699 if (m == look_for_page) {
3700 found_page = TRUE;
3701 }
3702 if ((vm_page_t) m->pageq.prev != prev_m)
3703 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3704 color, npages, m, m->pageq.prev, prev_m);
3705 if ( ! m->busy )
3706 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3707 color, npages, m);
3708 if (color != (unsigned int) -1) {
3709 if ((m->phys_page & vm_color_mask) != color)
3710 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3711 color, npages, m, m->phys_page & vm_color_mask, color);
3712 if ( ! m->free )
3713 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3714 color, npages, m);
3715 }
3716 ++npages;
3717 prev_m = m;
3718 }
3719 if (look_for_page != VM_PAGE_NULL) {
3720 unsigned int other_color;
3721
3722 if (expect_page && !found_page) {
3723 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3724 color, npages, look_for_page, look_for_page->phys_page);
3725 _vm_page_print(look_for_page);
3726 for (other_color = 0;
3727 other_color < vm_colors;
3728 other_color++) {
3729 if (other_color == color)
3730 continue;
3731 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3732 other_color, look_for_page, FALSE);
3733 }
3734 if (color == (unsigned int) -1) {
3735 vm_page_verify_free_list(&vm_lopage_queue_free,
3736 (unsigned int) -1, look_for_page, FALSE);
3737 }
3738 panic("vm_page_verify_free_list(color=%u)\n", color);
3739 }
3740 if (!expect_page && found_page) {
3741 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3742 color, npages, look_for_page, look_for_page->phys_page);
3743 }
3744 }
3745 return npages;
3746 }
3747
3748 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3749 static void
3750 vm_page_verify_free_lists( void )
3751 {
3752 unsigned int color, npages, nlopages;
3753
3754 if (! vm_page_verify_free_lists_enabled)
3755 return;
3756
3757 npages = 0;
3758
3759 lck_mtx_lock(&vm_page_queue_free_lock);
3760
3761 for( color = 0; color < vm_colors; color++ ) {
3762 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3763 color, VM_PAGE_NULL, FALSE);
3764 }
3765 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3766 (unsigned int) -1,
3767 VM_PAGE_NULL, FALSE);
3768 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3769 panic("vm_page_verify_free_lists: "
3770 "npages %u free_count %d nlopages %u lo_free_count %u",
3771 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3772
3773 lck_mtx_unlock(&vm_page_queue_free_lock);
3774 }
3775
3776 void
3777 vm_page_queues_assert(
3778 vm_page_t mem,
3779 int val)
3780 {
3781 #if DEBUG
3782 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3783 #endif
3784 if (mem->free + mem->active + mem->inactive + mem->speculative +
3785 mem->throttled + mem->pageout_queue > (val)) {
3786 _vm_page_print(mem);
3787 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3788 }
3789 if (VM_PAGE_WIRED(mem)) {
3790 assert(!mem->active);
3791 assert(!mem->inactive);
3792 assert(!mem->speculative);
3793 assert(!mem->throttled);
3794 assert(!mem->pageout_queue);
3795 }
3796 }
3797 #endif /* MACH_ASSERT */
3798
3799
3800 /*
3801 * CONTIGUOUS PAGE ALLOCATION
3802 *
3803 * Find a region large enough to contain at least n pages
3804 * of contiguous physical memory.
3805 *
3806 * This is done by traversing the vm_page_t array in a linear fashion
3807 * we assume that the vm_page_t array has the avaiable physical pages in an
3808 * ordered, ascending list... this is currently true of all our implementations
3809 * and must remain so... there can be 'holes' in the array... we also can
3810 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3811 * which use to happen via 'vm_page_convert'... that function was no longer
3812 * being called and was removed...
3813 *
3814 * The basic flow consists of stabilizing some of the interesting state of
3815 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3816 * sweep at the beginning of the array looking for pages that meet our criterea
3817 * for a 'stealable' page... currently we are pretty conservative... if the page
3818 * meets this criterea and is physically contiguous to the previous page in the 'run'
3819 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3820 * and start to develop a new run... if at this point we've already considered
3821 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3822 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3823 * to other threads trying to acquire free pages (or move pages from q to q),
3824 * and then continue from the spot we left off... we only make 1 pass through the
3825 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3826 * which steals the pages from the queues they're currently on... pages on the free
3827 * queue can be stolen directly... pages that are on any of the other queues
3828 * must be removed from the object they are tabled on... this requires taking the
3829 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3830 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3831 * dump the pages we've currently stolen back to the free list, and pick up our
3832 * scan from the point where we aborted the 'current' run.
3833 *
3834 *
3835 * Requirements:
3836 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3837 *
3838 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3839 *
3840 * Algorithm:
3841 */
3842
3843 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3844
3845
3846 #define RESET_STATE_OF_RUN() \
3847 MACRO_BEGIN \
3848 prevcontaddr = -2; \
3849 start_pnum = -1; \
3850 free_considered = 0; \
3851 substitute_needed = 0; \
3852 npages = 0; \
3853 MACRO_END
3854
3855 /*
3856 * Can we steal in-use (i.e. not free) pages when searching for
3857 * physically-contiguous pages ?
3858 */
3859 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3860
3861 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3862 #if DEBUG
3863 int vm_page_find_contig_debug = 0;
3864 #endif
3865
3866 static vm_page_t
3867 vm_page_find_contiguous(
3868 unsigned int contig_pages,
3869 ppnum_t max_pnum,
3870 ppnum_t pnum_mask,
3871 boolean_t wire,
3872 int flags)
3873 {
3874 vm_page_t m = NULL;
3875 ppnum_t prevcontaddr;
3876 ppnum_t start_pnum;
3877 unsigned int npages, considered, scanned;
3878 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3879 unsigned int idx_last_contig_page_found = 0;
3880 int free_considered, free_available;
3881 int substitute_needed;
3882 boolean_t wrapped;
3883 #if DEBUG
3884 clock_sec_t tv_start_sec, tv_end_sec;
3885 clock_usec_t tv_start_usec, tv_end_usec;
3886 #endif
3887 #if MACH_ASSERT
3888 int yielded = 0;
3889 int dumped_run = 0;
3890 int stolen_pages = 0;
3891 int compressed_pages = 0;
3892 #endif
3893
3894 if (contig_pages == 0)
3895 return VM_PAGE_NULL;
3896
3897 #if MACH_ASSERT
3898 vm_page_verify_free_lists();
3899 #endif
3900 #if DEBUG
3901 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3902 #endif
3903 PAGE_REPLACEMENT_ALLOWED(TRUE);
3904
3905 vm_page_lock_queues();
3906 lck_mtx_lock(&vm_page_queue_free_lock);
3907
3908 RESET_STATE_OF_RUN();
3909
3910 scanned = 0;
3911 considered = 0;
3912 free_available = vm_page_free_count - vm_page_free_reserved;
3913
3914 wrapped = FALSE;
3915
3916 if(flags & KMA_LOMEM)
3917 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3918 else
3919 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3920
3921 orig_last_idx = idx_last_contig_page_found;
3922 last_idx = orig_last_idx;
3923
3924 for (page_idx = last_idx, start_idx = last_idx;
3925 npages < contig_pages && page_idx < vm_pages_count;
3926 page_idx++) {
3927 retry:
3928 if (wrapped &&
3929 npages == 0 &&
3930 page_idx >= orig_last_idx) {
3931 /*
3932 * We're back where we started and we haven't
3933 * found any suitable contiguous range. Let's
3934 * give up.
3935 */
3936 break;
3937 }
3938 scanned++;
3939 m = &vm_pages[page_idx];
3940
3941 assert(!m->fictitious);
3942 assert(!m->private);
3943
3944 if (max_pnum && m->phys_page > max_pnum) {
3945 /* no more low pages... */
3946 break;
3947 }
3948 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3949 /*
3950 * not aligned
3951 */
3952 RESET_STATE_OF_RUN();
3953
3954 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3955 m->encrypted_cleaning ||
3956 m->pageout_queue || m->laundry || m->wanted ||
3957 m->cleaning || m->overwriting || m->pageout) {
3958 /*
3959 * page is in a transient state
3960 * or a state we don't want to deal
3961 * with, so don't consider it which
3962 * means starting a new run
3963 */
3964 RESET_STATE_OF_RUN();
3965
3966 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
3967 /*
3968 * page needs to be on one of our queues
3969 * or it needs to belong to the compressor pool
3970 * in order for it to be stable behind the
3971 * locks we hold at this point...
3972 * if not, don't consider it which
3973 * means starting a new run
3974 */
3975 RESET_STATE_OF_RUN();
3976
3977 } else if (!m->free && (!m->tabled || m->busy)) {
3978 /*
3979 * pages on the free list are always 'busy'
3980 * so we couldn't test for 'busy' in the check
3981 * for the transient states... pages that are
3982 * 'free' are never 'tabled', so we also couldn't
3983 * test for 'tabled'. So we check here to make
3984 * sure that a non-free page is not busy and is
3985 * tabled on an object...
3986 * if not, don't consider it which
3987 * means starting a new run
3988 */
3989 RESET_STATE_OF_RUN();
3990
3991 } else {
3992 if (m->phys_page != prevcontaddr + 1) {
3993 if ((m->phys_page & pnum_mask) != 0) {
3994 RESET_STATE_OF_RUN();
3995 goto did_consider;
3996 } else {
3997 npages = 1;
3998 start_idx = page_idx;
3999 start_pnum = m->phys_page;
4000 }
4001 } else {
4002 npages++;
4003 }
4004 prevcontaddr = m->phys_page;
4005
4006 VM_PAGE_CHECK(m);
4007 if (m->free) {
4008 free_considered++;
4009 } else {
4010 /*
4011 * This page is not free.
4012 * If we can't steal used pages,
4013 * we have to give up this run
4014 * and keep looking.
4015 * Otherwise, we might need to
4016 * move the contents of this page
4017 * into a substitute page.
4018 */
4019 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4020 if (m->pmapped || m->dirty || m->precious) {
4021 substitute_needed++;
4022 }
4023 #else
4024 RESET_STATE_OF_RUN();
4025 #endif
4026 }
4027
4028 if ((free_considered + substitute_needed) > free_available) {
4029 /*
4030 * if we let this run continue
4031 * we will end up dropping the vm_page_free_count
4032 * below the reserve limit... we need to abort
4033 * this run, but we can at least re-consider this
4034 * page... thus the jump back to 'retry'
4035 */
4036 RESET_STATE_OF_RUN();
4037
4038 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4039 considered++;
4040 goto retry;
4041 }
4042 /*
4043 * free_available == 0
4044 * so can't consider any free pages... if
4045 * we went to retry in this case, we'd
4046 * get stuck looking at the same page
4047 * w/o making any forward progress
4048 * we also want to take this path if we've already
4049 * reached our limit that controls the lock latency
4050 */
4051 }
4052 }
4053 did_consider:
4054 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4055
4056 PAGE_REPLACEMENT_ALLOWED(FALSE);
4057
4058 lck_mtx_unlock(&vm_page_queue_free_lock);
4059 vm_page_unlock_queues();
4060
4061 mutex_pause(0);
4062
4063 PAGE_REPLACEMENT_ALLOWED(TRUE);
4064
4065 vm_page_lock_queues();
4066 lck_mtx_lock(&vm_page_queue_free_lock);
4067
4068 RESET_STATE_OF_RUN();
4069 /*
4070 * reset our free page limit since we
4071 * dropped the lock protecting the vm_page_free_queue
4072 */
4073 free_available = vm_page_free_count - vm_page_free_reserved;
4074 considered = 0;
4075 #if MACH_ASSERT
4076 yielded++;
4077 #endif
4078 goto retry;
4079 }
4080 considered++;
4081 }
4082 m = VM_PAGE_NULL;
4083
4084 if (npages != contig_pages) {
4085 if (!wrapped) {
4086 /*
4087 * We didn't find a contiguous range but we didn't
4088 * start from the very first page.
4089 * Start again from the very first page.
4090 */
4091 RESET_STATE_OF_RUN();
4092 if( flags & KMA_LOMEM)
4093 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4094 else
4095 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4096 last_idx = 0;
4097 page_idx = last_idx;
4098 wrapped = TRUE;
4099 goto retry;
4100 }
4101 lck_mtx_unlock(&vm_page_queue_free_lock);
4102 } else {
4103 vm_page_t m1;
4104 vm_page_t m2;
4105 unsigned int cur_idx;
4106 unsigned int tmp_start_idx;
4107 vm_object_t locked_object = VM_OBJECT_NULL;
4108 boolean_t abort_run = FALSE;
4109
4110 assert(page_idx - start_idx == contig_pages);
4111
4112 tmp_start_idx = start_idx;
4113
4114 /*
4115 * first pass through to pull the free pages
4116 * off of the free queue so that in case we
4117 * need substitute pages, we won't grab any
4118 * of the free pages in the run... we'll clear
4119 * the 'free' bit in the 2nd pass, and even in
4120 * an abort_run case, we'll collect all of the
4121 * free pages in this run and return them to the free list
4122 */
4123 while (start_idx < page_idx) {
4124
4125 m1 = &vm_pages[start_idx++];
4126
4127 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4128 assert(m1->free);
4129 #endif
4130
4131 if (m1->free) {
4132 unsigned int color;
4133
4134 color = m1->phys_page & vm_color_mask;
4135 #if MACH_ASSERT
4136 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4137 #endif
4138 queue_remove(&vm_page_queue_free[color],
4139 m1,
4140 vm_page_t,
4141 pageq);
4142 m1->pageq.next = NULL;
4143 m1->pageq.prev = NULL;
4144 #if MACH_ASSERT
4145 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4146 #endif
4147 /*
4148 * Clear the "free" bit so that this page
4149 * does not get considered for another
4150 * concurrent physically-contiguous allocation.
4151 */
4152 m1->free = FALSE;
4153 assert(m1->busy);
4154
4155 vm_page_free_count--;
4156 }
4157 }
4158 /*
4159 * adjust global freelist counts
4160 */
4161 if (vm_page_free_count < vm_page_free_count_minimum)
4162 vm_page_free_count_minimum = vm_page_free_count;
4163
4164 if( flags & KMA_LOMEM)
4165 vm_page_lomem_find_contiguous_last_idx = page_idx;
4166 else
4167 vm_page_find_contiguous_last_idx = page_idx;
4168
4169 /*
4170 * we can drop the free queue lock at this point since
4171 * we've pulled any 'free' candidates off of the list
4172 * we need it dropped so that we can do a vm_page_grab
4173 * when substituing for pmapped/dirty pages
4174 */
4175 lck_mtx_unlock(&vm_page_queue_free_lock);
4176
4177 start_idx = tmp_start_idx;
4178 cur_idx = page_idx - 1;
4179
4180 while (start_idx++ < page_idx) {
4181 /*
4182 * must go through the list from back to front
4183 * so that the page list is created in the
4184 * correct order - low -> high phys addresses
4185 */
4186 m1 = &vm_pages[cur_idx--];
4187
4188 assert(!m1->free);
4189
4190 if (m1->object == VM_OBJECT_NULL) {
4191 /*
4192 * page has already been removed from
4193 * the free list in the 1st pass
4194 */
4195 assert(m1->offset == (vm_object_offset_t) -1);
4196 assert(m1->busy);
4197 assert(!m1->wanted);
4198 assert(!m1->laundry);
4199 } else {
4200 vm_object_t object;
4201 int refmod;
4202 boolean_t disconnected, reusable;
4203
4204 if (abort_run == TRUE)
4205 continue;
4206
4207 object = m1->object;
4208
4209 if (object != locked_object) {
4210 if (locked_object) {
4211 vm_object_unlock(locked_object);
4212 locked_object = VM_OBJECT_NULL;
4213 }
4214 if (vm_object_lock_try(object))
4215 locked_object = object;
4216 }
4217 if (locked_object == VM_OBJECT_NULL ||
4218 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4219 m1->encrypted_cleaning ||
4220 m1->pageout_queue || m1->laundry || m1->wanted ||
4221 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4222
4223 if (locked_object) {
4224 vm_object_unlock(locked_object);
4225 locked_object = VM_OBJECT_NULL;
4226 }
4227 tmp_start_idx = cur_idx;
4228 abort_run = TRUE;
4229 continue;
4230 }
4231
4232 disconnected = FALSE;
4233 reusable = FALSE;
4234
4235 if ((m1->reusable ||
4236 m1->object->all_reusable) &&
4237 m1->inactive &&
4238 !m1->dirty &&
4239 !m1->reference) {
4240 /* reusable page... */
4241 refmod = pmap_disconnect(m1->phys_page);
4242 disconnected = TRUE;
4243 if (refmod == 0) {
4244 /*
4245 * ... not reused: can steal
4246 * without relocating contents.
4247 */
4248 reusable = TRUE;
4249 }
4250 }
4251
4252 if ((m1->pmapped &&
4253 ! reusable) ||
4254 m1->dirty ||
4255 m1->precious) {
4256 vm_object_offset_t offset;
4257
4258 m2 = vm_page_grab();
4259
4260 if (m2 == VM_PAGE_NULL) {
4261 if (locked_object) {
4262 vm_object_unlock(locked_object);
4263 locked_object = VM_OBJECT_NULL;
4264 }
4265 tmp_start_idx = cur_idx;
4266 abort_run = TRUE;
4267 continue;
4268 }
4269 if (! disconnected) {
4270 if (m1->pmapped)
4271 refmod = pmap_disconnect(m1->phys_page);
4272 else
4273 refmod = 0;
4274 }
4275
4276 /* copy the page's contents */
4277 pmap_copy_page(m1->phys_page, m2->phys_page);
4278 /* copy the page's state */
4279 assert(!VM_PAGE_WIRED(m1));
4280 assert(!m1->free);
4281 assert(!m1->pageout_queue);
4282 assert(!m1->laundry);
4283 m2->reference = m1->reference;
4284 assert(!m1->gobbled);
4285 assert(!m1->private);
4286 m2->no_cache = m1->no_cache;
4287 m2->xpmapped = m1->xpmapped;
4288 assert(!m1->busy);
4289 assert(!m1->wanted);
4290 assert(!m1->fictitious);
4291 m2->pmapped = m1->pmapped; /* should flush cache ? */
4292 m2->wpmapped = m1->wpmapped;
4293 assert(!m1->pageout);
4294 m2->absent = m1->absent;
4295 m2->error = m1->error;
4296 m2->dirty = m1->dirty;
4297 assert(!m1->cleaning);
4298 m2->precious = m1->precious;
4299 m2->clustered = m1->clustered;
4300 assert(!m1->overwriting);
4301 m2->restart = m1->restart;
4302 m2->unusual = m1->unusual;
4303 m2->encrypted = m1->encrypted;
4304 assert(!m1->encrypted_cleaning);
4305 m2->cs_validated = m1->cs_validated;
4306 m2->cs_tainted = m1->cs_tainted;
4307
4308 /*
4309 * If m1 had really been reusable,
4310 * we would have just stolen it, so
4311 * let's not propagate it's "reusable"
4312 * bit and assert that m2 is not
4313 * marked as "reusable".
4314 */
4315 // m2->reusable = m1->reusable;
4316 assert(!m2->reusable);
4317
4318 assert(!m1->lopage);
4319 m2->slid = m1->slid;
4320 m2->was_dirty = m1->was_dirty;
4321 m2->compressor = m1->compressor;
4322
4323 /*
4324 * page may need to be flushed if
4325 * it is marshalled into a UPL
4326 * that is going to be used by a device
4327 * that doesn't support coherency
4328 */
4329 m2->written_by_kernel = TRUE;
4330
4331 /*
4332 * make sure we clear the ref/mod state
4333 * from the pmap layer... else we risk
4334 * inheriting state from the last time
4335 * this page was used...
4336 */
4337 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4338
4339 if (refmod & VM_MEM_REFERENCED)
4340 m2->reference = TRUE;
4341 if (refmod & VM_MEM_MODIFIED) {
4342 SET_PAGE_DIRTY(m2, TRUE);
4343 }
4344 offset = m1->offset;
4345
4346 /*
4347 * completely cleans up the state
4348 * of the page so that it is ready
4349 * to be put onto the free list, or
4350 * for this purpose it looks like it
4351 * just came off of the free list
4352 */
4353 vm_page_free_prepare(m1);
4354
4355 /*
4356 * now put the substitute page
4357 * on the object
4358 */
4359 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4360
4361 if (m2->compressor) {
4362 m2->pmapped = TRUE;
4363 m2->wpmapped = TRUE;
4364
4365 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4366 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4367 #if MACH_ASSERT
4368 compressed_pages++;
4369 #endif
4370 } else {
4371 if (m2->reference)
4372 vm_page_activate(m2);
4373 else
4374 vm_page_deactivate(m2);
4375 }
4376 PAGE_WAKEUP_DONE(m2);
4377
4378 } else {
4379 assert(!m1->compressor);
4380
4381 /*
4382 * completely cleans up the state
4383 * of the page so that it is ready
4384 * to be put onto the free list, or
4385 * for this purpose it looks like it
4386 * just came off of the free list
4387 */
4388 vm_page_free_prepare(m1);
4389 }
4390 #if MACH_ASSERT
4391 stolen_pages++;
4392 #endif
4393 }
4394 m1->pageq.next = (queue_entry_t) m;
4395 m1->pageq.prev = NULL;
4396 m = m1;
4397 }
4398 if (locked_object) {
4399 vm_object_unlock(locked_object);
4400 locked_object = VM_OBJECT_NULL;
4401 }
4402
4403 if (abort_run == TRUE) {
4404 if (m != VM_PAGE_NULL) {
4405 vm_page_free_list(m, FALSE);
4406 }
4407 #if MACH_ASSERT
4408 dumped_run++;
4409 #endif
4410 /*
4411 * want the index of the last
4412 * page in this run that was
4413 * successfully 'stolen', so back
4414 * it up 1 for the auto-decrement on use
4415 * and 1 more to bump back over this page
4416 */
4417 page_idx = tmp_start_idx + 2;
4418 if (page_idx >= vm_pages_count) {
4419 if (wrapped)
4420 goto done_scanning;
4421 page_idx = last_idx = 0;
4422 wrapped = TRUE;
4423 }
4424 abort_run = FALSE;
4425
4426 /*
4427 * We didn't find a contiguous range but we didn't
4428 * start from the very first page.
4429 * Start again from the very first page.
4430 */
4431 RESET_STATE_OF_RUN();
4432
4433 if( flags & KMA_LOMEM)
4434 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4435 else
4436 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4437
4438 last_idx = page_idx;
4439
4440 lck_mtx_lock(&vm_page_queue_free_lock);
4441 /*
4442 * reset our free page limit since we
4443 * dropped the lock protecting the vm_page_free_queue
4444 */
4445 free_available = vm_page_free_count - vm_page_free_reserved;
4446 goto retry;
4447 }
4448
4449 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4450
4451 if (wire == TRUE)
4452 m1->wire_count++;
4453 else
4454 m1->gobbled = TRUE;
4455 }
4456 if (wire == FALSE)
4457 vm_page_gobble_count += npages;
4458
4459 /*
4460 * gobbled pages are also counted as wired pages
4461 */
4462 vm_page_wire_count += npages;
4463
4464 assert(vm_page_verify_contiguous(m, npages));
4465 }
4466 done_scanning:
4467 PAGE_REPLACEMENT_ALLOWED(FALSE);
4468
4469 vm_page_unlock_queues();
4470
4471 #if DEBUG
4472 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4473
4474 tv_end_sec -= tv_start_sec;
4475 if (tv_end_usec < tv_start_usec) {
4476 tv_end_sec--;
4477 tv_end_usec += 1000000;
4478 }
4479 tv_end_usec -= tv_start_usec;
4480 if (tv_end_usec >= 1000000) {
4481 tv_end_sec++;
4482 tv_end_sec -= 1000000;
4483 }
4484 if (vm_page_find_contig_debug) {
4485 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4486 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4487 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4488 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4489 }
4490
4491 #endif
4492 #if MACH_ASSERT
4493 vm_page_verify_free_lists();
4494 #endif
4495 return m;
4496 }
4497
4498 /*
4499 * Allocate a list of contiguous, wired pages.
4500 */
4501 kern_return_t
4502 cpm_allocate(
4503 vm_size_t size,
4504 vm_page_t *list,
4505 ppnum_t max_pnum,
4506 ppnum_t pnum_mask,
4507 boolean_t wire,
4508 int flags)
4509 {
4510 vm_page_t pages;
4511 unsigned int npages;
4512
4513 if (size % PAGE_SIZE != 0)
4514 return KERN_INVALID_ARGUMENT;
4515
4516 npages = (unsigned int) (size / PAGE_SIZE);
4517 if (npages != size / PAGE_SIZE) {
4518 /* 32-bit overflow */
4519 return KERN_INVALID_ARGUMENT;
4520 }
4521
4522 /*
4523 * Obtain a pointer to a subset of the free
4524 * list large enough to satisfy the request;
4525 * the region will be physically contiguous.
4526 */
4527 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4528
4529 if (pages == VM_PAGE_NULL)
4530 return KERN_NO_SPACE;
4531 /*
4532 * determine need for wakeups
4533 */
4534 if ((vm_page_free_count < vm_page_free_min) ||
4535 ((vm_page_free_count < vm_page_free_target) &&
4536 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4537 thread_wakeup((event_t) &vm_page_free_wanted);
4538
4539 VM_CHECK_MEMORYSTATUS;
4540
4541 /*
4542 * The CPM pages should now be available and
4543 * ordered by ascending physical address.
4544 */
4545 assert(vm_page_verify_contiguous(pages, npages));
4546
4547 *list = pages;
4548 return KERN_SUCCESS;
4549 }
4550
4551
4552 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4553
4554 /*
4555 * when working on a 'run' of pages, it is necessary to hold
4556 * the vm_page_queue_lock (a hot global lock) for certain operations
4557 * on the page... however, the majority of the work can be done
4558 * while merely holding the object lock... in fact there are certain
4559 * collections of pages that don't require any work brokered by the
4560 * vm_page_queue_lock... to mitigate the time spent behind the global
4561 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4562 * while doing all of the work that doesn't require the vm_page_queue_lock...
4563 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4564 * necessary work for each page... we will grab the busy bit on the page
4565 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4566 * if it can't immediately take the vm_page_queue_lock in order to compete
4567 * for the locks in the same order that vm_pageout_scan takes them.
4568 * the operation names are modeled after the names of the routines that
4569 * need to be called in order to make the changes very obvious in the
4570 * original loop
4571 */
4572
4573 void
4574 vm_page_do_delayed_work(
4575 vm_object_t object,
4576 struct vm_page_delayed_work *dwp,
4577 int dw_count)
4578 {
4579 int j;
4580 vm_page_t m;
4581 vm_page_t local_free_q = VM_PAGE_NULL;
4582
4583 /*
4584 * pageout_scan takes the vm_page_lock_queues first
4585 * then tries for the object lock... to avoid what
4586 * is effectively a lock inversion, we'll go to the
4587 * trouble of taking them in that same order... otherwise
4588 * if this object contains the majority of the pages resident
4589 * in the UBC (or a small set of large objects actively being
4590 * worked on contain the majority of the pages), we could
4591 * cause the pageout_scan thread to 'starve' in its attempt
4592 * to find pages to move to the free queue, since it has to
4593 * successfully acquire the object lock of any candidate page
4594 * before it can steal/clean it.
4595 */
4596 if (!vm_page_trylockspin_queues()) {
4597 vm_object_unlock(object);
4598
4599 vm_page_lockspin_queues();
4600
4601 for (j = 0; ; j++) {
4602 if (!vm_object_lock_avoid(object) &&
4603 _vm_object_lock_try(object))
4604 break;
4605 vm_page_unlock_queues();
4606 mutex_pause(j);
4607 vm_page_lockspin_queues();
4608 }
4609 }
4610 for (j = 0; j < dw_count; j++, dwp++) {
4611
4612 m = dwp->dw_m;
4613
4614 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4615 vm_pageout_throttle_up(m);
4616
4617 if (dwp->dw_mask & DW_vm_page_wire)
4618 vm_page_wire(m);
4619 else if (dwp->dw_mask & DW_vm_page_unwire) {
4620 boolean_t queueit;
4621
4622 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4623
4624 vm_page_unwire(m, queueit);
4625 }
4626 if (dwp->dw_mask & DW_vm_page_free) {
4627 vm_page_free_prepare_queues(m);
4628
4629 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4630 /*
4631 * Add this page to our list of reclaimed pages,
4632 * to be freed later.
4633 */
4634 m->pageq.next = (queue_entry_t) local_free_q;
4635 local_free_q = m;
4636 } else {
4637 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4638 vm_page_deactivate_internal(m, FALSE);
4639 else if (dwp->dw_mask & DW_vm_page_activate) {
4640 if (m->active == FALSE) {
4641 vm_page_activate(m);
4642 }
4643 }
4644 else if (dwp->dw_mask & DW_vm_page_speculate)
4645 vm_page_speculate(m, TRUE);
4646 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4647 /*
4648 * if we didn't hold the object lock and did this,
4649 * we might disconnect the page, then someone might
4650 * soft fault it back in, then we would put it on the
4651 * cleaned queue, and so we would have a referenced (maybe even dirty)
4652 * page on that queue, which we don't want
4653 */
4654 int refmod_state = pmap_disconnect(m->phys_page);
4655
4656 if ((refmod_state & VM_MEM_REFERENCED)) {
4657 /*
4658 * this page has been touched since it got cleaned; let's activate it
4659 * if it hasn't already been
4660 */
4661 vm_pageout_enqueued_cleaned++;
4662 vm_pageout_cleaned_reactivated++;
4663 vm_pageout_cleaned_commit_reactivated++;
4664
4665 if (m->active == FALSE)
4666 vm_page_activate(m);
4667 } else {
4668 m->reference = FALSE;
4669 vm_page_enqueue_cleaned(m);
4670 }
4671 }
4672 else if (dwp->dw_mask & DW_vm_page_lru)
4673 vm_page_lru(m);
4674 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4675 if ( !m->pageout_queue)
4676 VM_PAGE_QUEUES_REMOVE(m);
4677 }
4678 if (dwp->dw_mask & DW_set_reference)
4679 m->reference = TRUE;
4680 else if (dwp->dw_mask & DW_clear_reference)
4681 m->reference = FALSE;
4682
4683 if (dwp->dw_mask & DW_move_page) {
4684 if ( !m->pageout_queue) {
4685 VM_PAGE_QUEUES_REMOVE(m);
4686
4687 assert(m->object != kernel_object);
4688
4689 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4690 }
4691 }
4692 if (dwp->dw_mask & DW_clear_busy)
4693 m->busy = FALSE;
4694
4695 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4696 PAGE_WAKEUP(m);
4697 }
4698 }
4699 vm_page_unlock_queues();
4700
4701 if (local_free_q)
4702 vm_page_free_list(local_free_q, TRUE);
4703
4704 VM_CHECK_MEMORYSTATUS;
4705
4706 }
4707
4708 kern_return_t
4709 vm_page_alloc_list(
4710 int page_count,
4711 int flags,
4712 vm_page_t *list)
4713 {
4714 vm_page_t lo_page_list = VM_PAGE_NULL;
4715 vm_page_t mem;
4716 int i;
4717
4718 if ( !(flags & KMA_LOMEM))
4719 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4720
4721 for (i = 0; i < page_count; i++) {
4722
4723 mem = vm_page_grablo();
4724
4725 if (mem == VM_PAGE_NULL) {
4726 if (lo_page_list)
4727 vm_page_free_list(lo_page_list, FALSE);
4728
4729 *list = VM_PAGE_NULL;
4730
4731 return (KERN_RESOURCE_SHORTAGE);
4732 }
4733 mem->pageq.next = (queue_entry_t) lo_page_list;
4734 lo_page_list = mem;
4735 }
4736 *list = lo_page_list;
4737
4738 return (KERN_SUCCESS);
4739 }
4740
4741 void
4742 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4743 {
4744 page->offset = offset;
4745 }
4746
4747 vm_page_t
4748 vm_page_get_next(vm_page_t page)
4749 {
4750 return ((vm_page_t) page->pageq.next);
4751 }
4752
4753 vm_object_offset_t
4754 vm_page_get_offset(vm_page_t page)
4755 {
4756 return (page->offset);
4757 }
4758
4759 ppnum_t
4760 vm_page_get_phys_page(vm_page_t page)
4761 {
4762 return (page->phys_page);
4763 }
4764
4765
4766 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4767
4768 #if HIBERNATION
4769
4770 static vm_page_t hibernate_gobble_queue;
4771
4772 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4773
4774 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4775 static int hibernate_flush_dirty_pages(int);
4776 static int hibernate_flush_queue(queue_head_t *, int);
4777
4778 void hibernate_flush_wait(void);
4779 void hibernate_mark_in_progress(void);
4780 void hibernate_clear_in_progress(void);
4781
4782 void hibernate_free_range(int, int);
4783 void hibernate_hash_insert_page(vm_page_t);
4784 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4785 void hibernate_rebuild_vm_structs(void);
4786 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4787 ppnum_t hibernate_lookup_paddr(unsigned int);
4788
4789 struct hibernate_statistics {
4790 int hibernate_considered;
4791 int hibernate_reentered_on_q;
4792 int hibernate_found_dirty;
4793 int hibernate_skipped_cleaning;
4794 int hibernate_skipped_transient;
4795 int hibernate_skipped_precious;
4796 int hibernate_skipped_external;
4797 int hibernate_queue_nolock;
4798 int hibernate_queue_paused;
4799 int hibernate_throttled;
4800 int hibernate_throttle_timeout;
4801 int hibernate_drained;
4802 int hibernate_drain_timeout;
4803 int cd_lock_failed;
4804 int cd_found_precious;
4805 int cd_found_wired;
4806 int cd_found_busy;
4807 int cd_found_unusual;
4808 int cd_found_cleaning;
4809 int cd_found_laundry;
4810 int cd_found_dirty;
4811 int cd_found_xpmapped;
4812 int cd_skipped_xpmapped;
4813 int cd_local_free;
4814 int cd_total_free;
4815 int cd_vm_page_wire_count;
4816 int cd_vm_struct_pages_unneeded;
4817 int cd_pages;
4818 int cd_discarded;
4819 int cd_count_wire;
4820 } hibernate_stats;
4821
4822
4823 /*
4824 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
4825 * so that we don't overrun the estimated image size, which would
4826 * result in a hibernation failure.
4827 */
4828 #define HIBERNATE_XPMAPPED_LIMIT 40000
4829
4830
4831 static int
4832 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4833 {
4834 wait_result_t wait_result;
4835
4836 vm_page_lock_queues();
4837
4838 while ( !queue_empty(&q->pgo_pending) ) {
4839
4840 q->pgo_draining = TRUE;
4841
4842 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4843
4844 vm_page_unlock_queues();
4845
4846 wait_result = thread_block(THREAD_CONTINUE_NULL);
4847
4848 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
4849 hibernate_stats.hibernate_drain_timeout++;
4850
4851 if (q == &vm_pageout_queue_external)
4852 return (0);
4853
4854 return (1);
4855 }
4856 vm_page_lock_queues();
4857
4858 hibernate_stats.hibernate_drained++;
4859 }
4860 vm_page_unlock_queues();
4861
4862 return (0);
4863 }
4864
4865
4866 boolean_t hibernate_skip_external = FALSE;
4867
4868 static int
4869 hibernate_flush_queue(queue_head_t *q, int qcount)
4870 {
4871 vm_page_t m;
4872 vm_object_t l_object = NULL;
4873 vm_object_t m_object = NULL;
4874 int refmod_state = 0;
4875 int try_failed_count = 0;
4876 int retval = 0;
4877 int current_run = 0;
4878 struct vm_pageout_queue *iq;
4879 struct vm_pageout_queue *eq;
4880 struct vm_pageout_queue *tq;
4881
4882
4883 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4884
4885 iq = &vm_pageout_queue_internal;
4886 eq = &vm_pageout_queue_external;
4887
4888 vm_page_lock_queues();
4889
4890 while (qcount && !queue_empty(q)) {
4891
4892 if (current_run++ == 1000) {
4893 if (hibernate_should_abort()) {
4894 retval = 1;
4895 break;
4896 }
4897 current_run = 0;
4898 }
4899
4900 m = (vm_page_t) queue_first(q);
4901 m_object = m->object;
4902
4903 /*
4904 * check to see if we currently are working
4905 * with the same object... if so, we've
4906 * already got the lock
4907 */
4908 if (m_object != l_object) {
4909 /*
4910 * the object associated with candidate page is
4911 * different from the one we were just working
4912 * with... dump the lock if we still own it
4913 */
4914 if (l_object != NULL) {
4915 vm_object_unlock(l_object);
4916 l_object = NULL;
4917 }
4918 /*
4919 * Try to lock object; since we've alread got the
4920 * page queues lock, we can only 'try' for this one.
4921 * if the 'try' fails, we need to do a mutex_pause
4922 * to allow the owner of the object lock a chance to
4923 * run...
4924 */
4925 if ( !vm_object_lock_try_scan(m_object)) {
4926
4927 if (try_failed_count > 20) {
4928 hibernate_stats.hibernate_queue_nolock++;
4929
4930 goto reenter_pg_on_q;
4931 }
4932 vm_pageout_scan_wants_object = m_object;
4933
4934 vm_page_unlock_queues();
4935 mutex_pause(try_failed_count++);
4936 vm_page_lock_queues();
4937
4938 hibernate_stats.hibernate_queue_paused++;
4939 continue;
4940 } else {
4941 l_object = m_object;
4942 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4943 }
4944 }
4945 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4946 /*
4947 * page is not to be cleaned
4948 * put it back on the head of its queue
4949 */
4950 if (m->cleaning)
4951 hibernate_stats.hibernate_skipped_cleaning++;
4952 else
4953 hibernate_stats.hibernate_skipped_transient++;
4954
4955 goto reenter_pg_on_q;
4956 }
4957 if (m_object->copy == VM_OBJECT_NULL) {
4958 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4959 /*
4960 * let the normal hibernate image path
4961 * deal with these
4962 */
4963 goto reenter_pg_on_q;
4964 }
4965 }
4966 if ( !m->dirty && m->pmapped) {
4967 refmod_state = pmap_get_refmod(m->phys_page);
4968
4969 if ((refmod_state & VM_MEM_MODIFIED)) {
4970 SET_PAGE_DIRTY(m, FALSE);
4971 }
4972 } else
4973 refmod_state = 0;
4974
4975 if ( !m->dirty) {
4976 /*
4977 * page is not to be cleaned
4978 * put it back on the head of its queue
4979 */
4980 if (m->precious)
4981 hibernate_stats.hibernate_skipped_precious++;
4982
4983 goto reenter_pg_on_q;
4984 }
4985
4986 if (hibernate_skip_external == TRUE && !m_object->internal) {
4987
4988 hibernate_stats.hibernate_skipped_external++;
4989
4990 goto reenter_pg_on_q;
4991 }
4992 tq = NULL;
4993
4994 if (m_object->internal) {
4995 if (VM_PAGE_Q_THROTTLED(iq))
4996 tq = iq;
4997 } else if (VM_PAGE_Q_THROTTLED(eq))
4998 tq = eq;
4999
5000 if (tq != NULL) {
5001 wait_result_t wait_result;
5002 int wait_count = 5;
5003
5004 if (l_object != NULL) {
5005 vm_object_unlock(l_object);
5006 l_object = NULL;
5007 }
5008 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5009
5010 while (retval == 0) {
5011
5012 tq->pgo_throttled = TRUE;
5013
5014 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5015
5016 vm_page_unlock_queues();
5017
5018 wait_result = thread_block(THREAD_CONTINUE_NULL);
5019
5020 vm_page_lock_queues();
5021
5022 if (wait_result != THREAD_TIMED_OUT)
5023 break;
5024 if (!VM_PAGE_Q_THROTTLED(tq))
5025 break;
5026
5027 if (hibernate_should_abort())
5028 retval = 1;
5029
5030 if (--wait_count == 0) {
5031
5032 hibernate_stats.hibernate_throttle_timeout++;
5033
5034 if (tq == eq) {
5035 hibernate_skip_external = TRUE;
5036 break;
5037 }
5038 retval = 1;
5039 }
5040 }
5041 if (retval)
5042 break;
5043
5044 hibernate_stats.hibernate_throttled++;
5045
5046 continue;
5047 }
5048 /*
5049 * we've already factored out pages in the laundry which
5050 * means this page can't be on the pageout queue so it's
5051 * safe to do the VM_PAGE_QUEUES_REMOVE
5052 */
5053 assert(!m->pageout_queue);
5054
5055 VM_PAGE_QUEUES_REMOVE(m);
5056
5057 if (COMPRESSED_PAGER_IS_ACTIVE)
5058 pmap_disconnect(m->phys_page);
5059
5060 vm_pageout_cluster(m, FALSE);
5061
5062 hibernate_stats.hibernate_found_dirty++;
5063
5064 goto next_pg;
5065
5066 reenter_pg_on_q:
5067 queue_remove(q, m, vm_page_t, pageq);
5068 queue_enter(q, m, vm_page_t, pageq);
5069
5070 hibernate_stats.hibernate_reentered_on_q++;
5071 next_pg:
5072 hibernate_stats.hibernate_considered++;
5073
5074 qcount--;
5075 try_failed_count = 0;
5076 }
5077 if (l_object != NULL) {
5078 vm_object_unlock(l_object);
5079 l_object = NULL;
5080 }
5081 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5082
5083 vm_page_unlock_queues();
5084
5085 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5086
5087 return (retval);
5088 }
5089
5090
5091 static int
5092 hibernate_flush_dirty_pages(int pass)
5093 {
5094 struct vm_speculative_age_q *aq;
5095 uint32_t i;
5096
5097 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5098
5099 if (vm_page_local_q) {
5100 for (i = 0; i < vm_page_local_q_count; i++)
5101 vm_page_reactivate_local(i, TRUE, FALSE);
5102 }
5103
5104 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5105 int qcount;
5106 vm_page_t m;
5107
5108 aq = &vm_page_queue_speculative[i];
5109
5110 if (queue_empty(&aq->age_q))
5111 continue;
5112 qcount = 0;
5113
5114 vm_page_lockspin_queues();
5115
5116 queue_iterate(&aq->age_q,
5117 m,
5118 vm_page_t,
5119 pageq)
5120 {
5121 qcount++;
5122 }
5123 vm_page_unlock_queues();
5124
5125 if (qcount) {
5126 if (hibernate_flush_queue(&aq->age_q, qcount))
5127 return (1);
5128 }
5129 }
5130 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5131 return (1);
5132 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5133 return (1);
5134 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5135 return (1);
5136 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5137 return (1);
5138
5139 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5140 vm_compressor_record_warmup_start();
5141
5142 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5143 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5144 vm_compressor_record_warmup_end();
5145 return (1);
5146 }
5147 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5148 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5149 vm_compressor_record_warmup_end();
5150 return (1);
5151 }
5152 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5153 vm_compressor_record_warmup_end();
5154
5155 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5156 return (1);
5157
5158 return (0);
5159 }
5160
5161
5162 int
5163 hibernate_flush_memory()
5164 {
5165 int retval;
5166
5167 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5168
5169 hibernate_cleaning_in_progress = TRUE;
5170 hibernate_skip_external = FALSE;
5171
5172 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5173
5174 if (COMPRESSED_PAGER_IS_ACTIVE) {
5175
5176 if ((retval = hibernate_flush_dirty_pages(2)) == 0) {
5177
5178 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5179
5180 vm_compressor_flush();
5181
5182 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5183 }
5184 }
5185 if (retval == 0 && consider_buffer_cache_collect != NULL) {
5186 unsigned int orig_wire_count;
5187
5188 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5189 orig_wire_count = vm_page_wire_count;
5190
5191 (void)(*consider_buffer_cache_collect)(1);
5192 consider_zone_gc(TRUE);
5193
5194 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5195
5196 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5197 }
5198 }
5199 hibernate_cleaning_in_progress = FALSE;
5200
5201 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5202
5203 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5204 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5205
5206
5207 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5208 hibernate_stats.hibernate_considered,
5209 hibernate_stats.hibernate_reentered_on_q,
5210 hibernate_stats.hibernate_found_dirty);
5211 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5212 hibernate_stats.hibernate_skipped_cleaning,
5213 hibernate_stats.hibernate_skipped_transient,
5214 hibernate_stats.hibernate_skipped_precious,
5215 hibernate_stats.hibernate_skipped_external,
5216 hibernate_stats.hibernate_queue_nolock);
5217 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5218 hibernate_stats.hibernate_queue_paused,
5219 hibernate_stats.hibernate_throttled,
5220 hibernate_stats.hibernate_throttle_timeout,
5221 hibernate_stats.hibernate_drained,
5222 hibernate_stats.hibernate_drain_timeout);
5223
5224 return (retval);
5225 }
5226
5227
5228 static void
5229 hibernate_page_list_zero(hibernate_page_list_t *list)
5230 {
5231 uint32_t bank;
5232 hibernate_bitmap_t * bitmap;
5233
5234 bitmap = &list->bank_bitmap[0];
5235 for (bank = 0; bank < list->bank_count; bank++)
5236 {
5237 uint32_t last_bit;
5238
5239 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5240 // set out-of-bound bits at end of bitmap.
5241 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5242 if (last_bit)
5243 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5244
5245 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5246 }
5247 }
5248
5249 void
5250 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5251 {
5252 uint32_t i;
5253 vm_page_t m;
5254 uint64_t start, end, timeout, nsec;
5255 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5256 clock_get_uptime(&start);
5257
5258 for (i = 0; i < gobble_count; i++)
5259 {
5260 while (VM_PAGE_NULL == (m = vm_page_grab()))
5261 {
5262 clock_get_uptime(&end);
5263 if (end >= timeout)
5264 break;
5265 VM_PAGE_WAIT();
5266 }
5267 if (!m)
5268 break;
5269 m->busy = FALSE;
5270 vm_page_gobble(m);
5271
5272 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5273 hibernate_gobble_queue = m;
5274 }
5275
5276 clock_get_uptime(&end);
5277 absolutetime_to_nanoseconds(end - start, &nsec);
5278 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5279 }
5280
5281 void
5282 hibernate_free_gobble_pages(void)
5283 {
5284 vm_page_t m, next;
5285 uint32_t count = 0;
5286
5287 m = (vm_page_t) hibernate_gobble_queue;
5288 while(m)
5289 {
5290 next = (vm_page_t) m->pageq.next;
5291 vm_page_free(m);
5292 count++;
5293 m = next;
5294 }
5295 hibernate_gobble_queue = VM_PAGE_NULL;
5296
5297 if (count)
5298 HIBLOG("Freed %d pages\n", count);
5299 }
5300
5301 static boolean_t
5302 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5303 {
5304 vm_object_t object = NULL;
5305 int refmod_state;
5306 boolean_t discard = FALSE;
5307
5308 do
5309 {
5310 if (m->private)
5311 panic("hibernate_consider_discard: private");
5312
5313 if (!vm_object_lock_try(m->object)) {
5314 if (!preflight) hibernate_stats.cd_lock_failed++;
5315 break;
5316 }
5317 object = m->object;
5318
5319 if (VM_PAGE_WIRED(m)) {
5320 if (!preflight) hibernate_stats.cd_found_wired++;
5321 break;
5322 }
5323 if (m->precious) {
5324 if (!preflight) hibernate_stats.cd_found_precious++;
5325 break;
5326 }
5327 if (m->busy || !object->alive) {
5328 /*
5329 * Somebody is playing with this page.
5330 */
5331 if (!preflight) hibernate_stats.cd_found_busy++;
5332 break;
5333 }
5334 if (m->absent || m->unusual || m->error) {
5335 /*
5336 * If it's unusual in anyway, ignore it
5337 */
5338 if (!preflight) hibernate_stats.cd_found_unusual++;
5339 break;
5340 }
5341 if (m->cleaning) {
5342 if (!preflight) hibernate_stats.cd_found_cleaning++;
5343 break;
5344 }
5345 if (m->laundry) {
5346 if (!preflight) hibernate_stats.cd_found_laundry++;
5347 break;
5348 }
5349 if (!m->dirty)
5350 {
5351 refmod_state = pmap_get_refmod(m->phys_page);
5352
5353 if (refmod_state & VM_MEM_REFERENCED)
5354 m->reference = TRUE;
5355 if (refmod_state & VM_MEM_MODIFIED) {
5356 SET_PAGE_DIRTY(m, FALSE);
5357 }
5358 }
5359
5360 /*
5361 * If it's clean or purgeable we can discard the page on wakeup.
5362 */
5363 discard = (!m->dirty)
5364 || (VM_PURGABLE_VOLATILE == object->purgable)
5365 || (VM_PURGABLE_EMPTY == object->purgable);
5366
5367
5368 if (discard == FALSE) {
5369 if (!preflight)
5370 hibernate_stats.cd_found_dirty++;
5371 } else if (m->xpmapped && m->reference && !object->internal) {
5372 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5373 if (!preflight)
5374 hibernate_stats.cd_found_xpmapped++;
5375 discard = FALSE;
5376 } else {
5377 if (!preflight)
5378 hibernate_stats.cd_skipped_xpmapped++;
5379 }
5380 }
5381 }
5382 while (FALSE);
5383
5384 if (object)
5385 vm_object_unlock(object);
5386
5387 return (discard);
5388 }
5389
5390
5391 static void
5392 hibernate_discard_page(vm_page_t m)
5393 {
5394 if (m->absent || m->unusual || m->error)
5395 /*
5396 * If it's unusual in anyway, ignore
5397 */
5398 return;
5399
5400 #if DEBUG
5401 vm_object_t object = m->object;
5402 if (!vm_object_lock_try(m->object))
5403 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5404 #else
5405 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5406 makes sure these locks are uncontended before sleep */
5407 #endif /* !DEBUG */
5408
5409 if (m->pmapped == TRUE)
5410 {
5411 __unused int refmod_state = pmap_disconnect(m->phys_page);
5412 }
5413
5414 if (m->laundry)
5415 panic("hibernate_discard_page(%p) laundry", m);
5416 if (m->private)
5417 panic("hibernate_discard_page(%p) private", m);
5418 if (m->fictitious)
5419 panic("hibernate_discard_page(%p) fictitious", m);
5420
5421 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5422 {
5423 /* object should be on a queue */
5424 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5425 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5426 assert(old_queue);
5427 if (m->object->purgeable_when_ripe) {
5428 vm_purgeable_token_delete_first(old_queue);
5429 }
5430 m->object->purgable = VM_PURGABLE_EMPTY;
5431 }
5432
5433 vm_page_free(m);
5434
5435 #if DEBUG
5436 vm_object_unlock(object);
5437 #endif /* DEBUG */
5438 }
5439
5440 /*
5441 Grab locks for hibernate_page_list_setall()
5442 */
5443 void
5444 hibernate_vm_lock_queues(void)
5445 {
5446 vm_object_lock(compressor_object);
5447 vm_page_lock_queues();
5448 lck_mtx_lock(&vm_page_queue_free_lock);
5449
5450 if (vm_page_local_q) {
5451 uint32_t i;
5452 for (i = 0; i < vm_page_local_q_count; i++) {
5453 struct vpl *lq;
5454 lq = &vm_page_local_q[i].vpl_un.vpl;
5455 VPL_LOCK(&lq->vpl_lock);
5456 }
5457 }
5458 }
5459
5460 void
5461 hibernate_vm_unlock_queues(void)
5462 {
5463 if (vm_page_local_q) {
5464 uint32_t i;
5465 for (i = 0; i < vm_page_local_q_count; i++) {
5466 struct vpl *lq;
5467 lq = &vm_page_local_q[i].vpl_un.vpl;
5468 VPL_UNLOCK(&lq->vpl_lock);
5469 }
5470 }
5471 lck_mtx_unlock(&vm_page_queue_free_lock);
5472 vm_page_unlock_queues();
5473 vm_object_unlock(compressor_object);
5474 }
5475
5476 /*
5477 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5478 pages known to VM to not need saving are subtracted.
5479 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5480 */
5481
5482 void
5483 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5484 hibernate_page_list_t * page_list_wired,
5485 hibernate_page_list_t * page_list_pal,
5486 boolean_t preflight,
5487 boolean_t will_discard,
5488 uint32_t * pagesOut)
5489 {
5490 uint64_t start, end, nsec;
5491 vm_page_t m;
5492 vm_page_t next;
5493 uint32_t pages = page_list->page_count;
5494 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5495 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5496 uint32_t count_wire = pages;
5497 uint32_t count_discard_active = 0;
5498 uint32_t count_discard_inactive = 0;
5499 uint32_t count_discard_cleaned = 0;
5500 uint32_t count_discard_purgeable = 0;
5501 uint32_t count_discard_speculative = 0;
5502 uint32_t count_discard_vm_struct_pages = 0;
5503 uint32_t i;
5504 uint32_t bank;
5505 hibernate_bitmap_t * bitmap;
5506 hibernate_bitmap_t * bitmap_wired;
5507 boolean_t discard_all;
5508 boolean_t discard;
5509
5510 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5511
5512 if (preflight) {
5513 page_list = NULL;
5514 page_list_wired = NULL;
5515 page_list_pal = NULL;
5516 discard_all = FALSE;
5517 } else {
5518 discard_all = will_discard;
5519 }
5520
5521 #if DEBUG
5522 if (!preflight)
5523 {
5524 vm_page_lock_queues();
5525 if (vm_page_local_q) {
5526 for (i = 0; i < vm_page_local_q_count; i++) {
5527 struct vpl *lq;
5528 lq = &vm_page_local_q[i].vpl_un.vpl;
5529 VPL_LOCK(&lq->vpl_lock);
5530 }
5531 }
5532 }
5533 #endif /* DEBUG */
5534
5535
5536 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5537
5538 clock_get_uptime(&start);
5539
5540 if (!preflight) {
5541 hibernate_page_list_zero(page_list);
5542 hibernate_page_list_zero(page_list_wired);
5543 hibernate_page_list_zero(page_list_pal);
5544
5545 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5546 hibernate_stats.cd_pages = pages;
5547 }
5548
5549 if (vm_page_local_q) {
5550 for (i = 0; i < vm_page_local_q_count; i++)
5551 vm_page_reactivate_local(i, TRUE, !preflight);
5552 }
5553
5554 if (preflight) {
5555 vm_object_lock(compressor_object);
5556 vm_page_lock_queues();
5557 lck_mtx_lock(&vm_page_queue_free_lock);
5558 }
5559
5560 m = (vm_page_t) hibernate_gobble_queue;
5561 while (m)
5562 {
5563 pages--;
5564 count_wire--;
5565 if (!preflight) {
5566 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5567 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5568 }
5569 m = (vm_page_t) m->pageq.next;
5570 }
5571
5572 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5573 {
5574 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5575 {
5576 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5577 {
5578 pages--;
5579 count_wire--;
5580 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5581 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5582
5583 hibernate_stats.cd_local_free++;
5584 hibernate_stats.cd_total_free++;
5585 }
5586 }
5587 }
5588
5589 for( i = 0; i < vm_colors; i++ )
5590 {
5591 queue_iterate(&vm_page_queue_free[i],
5592 m,
5593 vm_page_t,
5594 pageq)
5595 {
5596 pages--;
5597 count_wire--;
5598 if (!preflight) {
5599 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5600 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5601
5602 hibernate_stats.cd_total_free++;
5603 }
5604 }
5605 }
5606
5607 queue_iterate(&vm_lopage_queue_free,
5608 m,
5609 vm_page_t,
5610 pageq)
5611 {
5612 pages--;
5613 count_wire--;
5614 if (!preflight) {
5615 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5616 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5617
5618 hibernate_stats.cd_total_free++;
5619 }
5620 }
5621
5622 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5623 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5624 {
5625 next = (vm_page_t) m->pageq.next;
5626 discard = FALSE;
5627 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5628 && hibernate_consider_discard(m, preflight))
5629 {
5630 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5631 count_discard_inactive++;
5632 discard = discard_all;
5633 }
5634 else
5635 count_throttled++;
5636 count_wire--;
5637 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5638
5639 if (discard) hibernate_discard_page(m);
5640 m = next;
5641 }
5642
5643 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5644 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5645 {
5646 next = (vm_page_t) m->pageq.next;
5647 discard = FALSE;
5648 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5649 && hibernate_consider_discard(m, preflight))
5650 {
5651 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5652 if (m->dirty)
5653 count_discard_purgeable++;
5654 else
5655 count_discard_inactive++;
5656 discard = discard_all;
5657 }
5658 else
5659 count_anonymous++;
5660 count_wire--;
5661 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5662 if (discard) hibernate_discard_page(m);
5663 m = next;
5664 }
5665
5666 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5667 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5668 {
5669 next = (vm_page_t) m->pageq.next;
5670 discard = FALSE;
5671 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5672 && hibernate_consider_discard(m, preflight))
5673 {
5674 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5675 if (m->dirty)
5676 count_discard_purgeable++;
5677 else
5678 count_discard_cleaned++;
5679 discard = discard_all;
5680 }
5681 else
5682 count_cleaned++;
5683 count_wire--;
5684 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5685 if (discard) hibernate_discard_page(m);
5686 m = next;
5687 }
5688
5689 m = (vm_page_t) queue_first(&vm_page_queue_active);
5690 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5691 {
5692 next = (vm_page_t) m->pageq.next;
5693 discard = FALSE;
5694 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5695 && hibernate_consider_discard(m, preflight))
5696 {
5697 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5698 if (m->dirty)
5699 count_discard_purgeable++;
5700 else
5701 count_discard_active++;
5702 discard = discard_all;
5703 }
5704 else
5705 count_active++;
5706 count_wire--;
5707 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5708 if (discard) hibernate_discard_page(m);
5709 m = next;
5710 }
5711
5712 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5713 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5714 {
5715 next = (vm_page_t) m->pageq.next;
5716 discard = FALSE;
5717 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5718 && hibernate_consider_discard(m, preflight))
5719 {
5720 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5721 if (m->dirty)
5722 count_discard_purgeable++;
5723 else
5724 count_discard_inactive++;
5725 discard = discard_all;
5726 }
5727 else
5728 count_inactive++;
5729 count_wire--;
5730 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5731 if (discard) hibernate_discard_page(m);
5732 m = next;
5733 }
5734
5735 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5736 {
5737 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5738 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5739 {
5740 next = (vm_page_t) m->pageq.next;
5741 discard = FALSE;
5742 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5743 && hibernate_consider_discard(m, preflight))
5744 {
5745 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5746 count_discard_speculative++;
5747 discard = discard_all;
5748 }
5749 else
5750 count_speculative++;
5751 count_wire--;
5752 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5753 if (discard) hibernate_discard_page(m);
5754 m = next;
5755 }
5756 }
5757
5758 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5759 {
5760 count_compressor++;
5761 count_wire--;
5762 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5763 }
5764
5765 if (preflight == FALSE && discard_all == TRUE) {
5766 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5767
5768 HIBLOG("hibernate_teardown started\n");
5769 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5770 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5771
5772 pages -= count_discard_vm_struct_pages;
5773 count_wire -= count_discard_vm_struct_pages;
5774
5775 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5776
5777 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5778 }
5779
5780 if (!preflight) {
5781 // pull wired from hibernate_bitmap
5782 bitmap = &page_list->bank_bitmap[0];
5783 bitmap_wired = &page_list_wired->bank_bitmap[0];
5784 for (bank = 0; bank < page_list->bank_count; bank++)
5785 {
5786 for (i = 0; i < bitmap->bitmapwords; i++)
5787 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5788 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5789 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5790 }
5791 }
5792
5793 // machine dependent adjustments
5794 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5795
5796 if (!preflight) {
5797 hibernate_stats.cd_count_wire = count_wire;
5798 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
5799 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
5800 }
5801
5802 clock_get_uptime(&end);
5803 absolutetime_to_nanoseconds(end - start, &nsec);
5804 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5805
5806 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5807 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
5808 discard_all ? "did" : "could",
5809 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5810
5811 if (hibernate_stats.cd_skipped_xpmapped)
5812 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
5813
5814 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5815
5816 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
5817
5818 #if DEBUG
5819 if (!preflight)
5820 {
5821 if (vm_page_local_q) {
5822 for (i = 0; i < vm_page_local_q_count; i++) {
5823 struct vpl *lq;
5824 lq = &vm_page_local_q[i].vpl_un.vpl;
5825 VPL_UNLOCK(&lq->vpl_lock);
5826 }
5827 }
5828 vm_page_unlock_queues();
5829 }
5830 #endif /* DEBUG */
5831
5832 if (preflight) {
5833 lck_mtx_unlock(&vm_page_queue_free_lock);
5834 vm_page_unlock_queues();
5835 vm_object_unlock(compressor_object);
5836 }
5837
5838 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5839 }
5840
5841 void
5842 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5843 {
5844 uint64_t start, end, nsec;
5845 vm_page_t m;
5846 vm_page_t next;
5847 uint32_t i;
5848 uint32_t count_discard_active = 0;
5849 uint32_t count_discard_inactive = 0;
5850 uint32_t count_discard_purgeable = 0;
5851 uint32_t count_discard_cleaned = 0;
5852 uint32_t count_discard_speculative = 0;
5853
5854
5855 #if DEBUG
5856 vm_page_lock_queues();
5857 if (vm_page_local_q) {
5858 for (i = 0; i < vm_page_local_q_count; i++) {
5859 struct vpl *lq;
5860 lq = &vm_page_local_q[i].vpl_un.vpl;
5861 VPL_LOCK(&lq->vpl_lock);
5862 }
5863 }
5864 #endif /* DEBUG */
5865
5866 clock_get_uptime(&start);
5867
5868 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5869 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5870 {
5871 next = (vm_page_t) m->pageq.next;
5872 if (hibernate_page_bittst(page_list, m->phys_page))
5873 {
5874 if (m->dirty)
5875 count_discard_purgeable++;
5876 else
5877 count_discard_inactive++;
5878 hibernate_discard_page(m);
5879 }
5880 m = next;
5881 }
5882
5883 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5884 {
5885 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5886 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5887 {
5888 next = (vm_page_t) m->pageq.next;
5889 if (hibernate_page_bittst(page_list, m->phys_page))
5890 {
5891 count_discard_speculative++;
5892 hibernate_discard_page(m);
5893 }
5894 m = next;
5895 }
5896 }
5897
5898 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5899 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5900 {
5901 next = (vm_page_t) m->pageq.next;
5902 if (hibernate_page_bittst(page_list, m->phys_page))
5903 {
5904 if (m->dirty)
5905 count_discard_purgeable++;
5906 else
5907 count_discard_inactive++;
5908 hibernate_discard_page(m);
5909 }
5910 m = next;
5911 }
5912
5913 m = (vm_page_t) queue_first(&vm_page_queue_active);
5914 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5915 {
5916 next = (vm_page_t) m->pageq.next;
5917 if (hibernate_page_bittst(page_list, m->phys_page))
5918 {
5919 if (m->dirty)
5920 count_discard_purgeable++;
5921 else
5922 count_discard_active++;
5923 hibernate_discard_page(m);
5924 }
5925 m = next;
5926 }
5927
5928 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5929 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5930 {
5931 next = (vm_page_t) m->pageq.next;
5932 if (hibernate_page_bittst(page_list, m->phys_page))
5933 {
5934 if (m->dirty)
5935 count_discard_purgeable++;
5936 else
5937 count_discard_cleaned++;
5938 hibernate_discard_page(m);
5939 }
5940 m = next;
5941 }
5942
5943 #if DEBUG
5944 if (vm_page_local_q) {
5945 for (i = 0; i < vm_page_local_q_count; i++) {
5946 struct vpl *lq;
5947 lq = &vm_page_local_q[i].vpl_un.vpl;
5948 VPL_UNLOCK(&lq->vpl_lock);
5949 }
5950 }
5951 vm_page_unlock_queues();
5952 #endif /* DEBUG */
5953
5954 clock_get_uptime(&end);
5955 absolutetime_to_nanoseconds(end - start, &nsec);
5956 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5957 nsec / 1000000ULL,
5958 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5959 }
5960
5961 boolean_t hibernate_paddr_map_inited = FALSE;
5962 boolean_t hibernate_rebuild_needed = FALSE;
5963 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
5964 vm_page_t hibernate_rebuild_hash_list = NULL;
5965
5966 unsigned int hibernate_teardown_found_tabled_pages = 0;
5967 unsigned int hibernate_teardown_found_created_pages = 0;
5968 unsigned int hibernate_teardown_found_free_pages = 0;
5969 unsigned int hibernate_teardown_vm_page_free_count;
5970
5971
5972 struct ppnum_mapping {
5973 struct ppnum_mapping *ppnm_next;
5974 ppnum_t ppnm_base_paddr;
5975 unsigned int ppnm_sindx;
5976 unsigned int ppnm_eindx;
5977 };
5978
5979 struct ppnum_mapping *ppnm_head;
5980 struct ppnum_mapping *ppnm_last_found = NULL;
5981
5982
5983 void
5984 hibernate_create_paddr_map()
5985 {
5986 unsigned int i;
5987 ppnum_t next_ppnum_in_run = 0;
5988 struct ppnum_mapping *ppnm = NULL;
5989
5990 if (hibernate_paddr_map_inited == FALSE) {
5991
5992 for (i = 0; i < vm_pages_count; i++) {
5993
5994 if (ppnm)
5995 ppnm->ppnm_eindx = i;
5996
5997 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
5998
5999 ppnm = kalloc(sizeof(struct ppnum_mapping));
6000
6001 ppnm->ppnm_next = ppnm_head;
6002 ppnm_head = ppnm;
6003
6004 ppnm->ppnm_sindx = i;
6005 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6006 }
6007 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6008 }
6009 ppnm->ppnm_eindx++;
6010
6011 hibernate_paddr_map_inited = TRUE;
6012 }
6013 }
6014
6015 ppnum_t
6016 hibernate_lookup_paddr(unsigned int indx)
6017 {
6018 struct ppnum_mapping *ppnm = NULL;
6019
6020 ppnm = ppnm_last_found;
6021
6022 if (ppnm) {
6023 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6024 goto done;
6025 }
6026 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6027
6028 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6029 ppnm_last_found = ppnm;
6030 break;
6031 }
6032 }
6033 if (ppnm == NULL)
6034 panic("hibernate_lookup_paddr of %d failed\n", indx);
6035 done:
6036 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6037 }
6038
6039
6040 uint32_t
6041 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6042 {
6043 addr64_t saddr_aligned;
6044 addr64_t eaddr_aligned;
6045 addr64_t addr;
6046 ppnum_t paddr;
6047 unsigned int mark_as_unneeded_pages = 0;
6048
6049 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6050 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6051
6052 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6053
6054 paddr = pmap_find_phys(kernel_pmap, addr);
6055
6056 assert(paddr);
6057
6058 hibernate_page_bitset(page_list, TRUE, paddr);
6059 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6060
6061 mark_as_unneeded_pages++;
6062 }
6063 return (mark_as_unneeded_pages);
6064 }
6065
6066
6067 void
6068 hibernate_hash_insert_page(vm_page_t mem)
6069 {
6070 vm_page_bucket_t *bucket;
6071 int hash_id;
6072
6073 assert(mem->hashed);
6074 assert(mem->object);
6075 assert(mem->offset != (vm_object_offset_t) -1);
6076
6077 /*
6078 * Insert it into the object_object/offset hash table
6079 */
6080 hash_id = vm_page_hash(mem->object, mem->offset);
6081 bucket = &vm_page_buckets[hash_id];
6082
6083 mem->next = bucket->pages;
6084 bucket->pages = mem;
6085 }
6086
6087
6088 void
6089 hibernate_free_range(int sindx, int eindx)
6090 {
6091 vm_page_t mem;
6092 unsigned int color;
6093
6094 while (sindx < eindx) {
6095 mem = &vm_pages[sindx];
6096
6097 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6098
6099 mem->lopage = FALSE;
6100 mem->free = TRUE;
6101
6102 color = mem->phys_page & vm_color_mask;
6103 queue_enter_first(&vm_page_queue_free[color],
6104 mem,
6105 vm_page_t,
6106 pageq);
6107 vm_page_free_count++;
6108
6109 sindx++;
6110 }
6111 }
6112
6113
6114 extern void hibernate_rebuild_pmap_structs(void);
6115
6116 void
6117 hibernate_rebuild_vm_structs(void)
6118 {
6119 int cindx, sindx, eindx;
6120 vm_page_t mem, tmem, mem_next;
6121 AbsoluteTime startTime, endTime;
6122 uint64_t nsec;
6123
6124 if (hibernate_rebuild_needed == FALSE)
6125 return;
6126
6127 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6128 HIBLOG("hibernate_rebuild started\n");
6129
6130 clock_get_uptime(&startTime);
6131
6132 hibernate_rebuild_pmap_structs();
6133
6134 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6135 eindx = vm_pages_count;
6136
6137 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6138
6139 mem = &vm_pages[cindx];
6140 /*
6141 * hibernate_teardown_vm_structs leaves the location where
6142 * this vm_page_t must be located in "next".
6143 */
6144 tmem = mem->next;
6145 mem->next = NULL;
6146
6147 sindx = (int)(tmem - &vm_pages[0]);
6148
6149 if (mem != tmem) {
6150 /*
6151 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6152 * so move it back to its real location
6153 */
6154 *tmem = *mem;
6155 mem = tmem;
6156 }
6157 if (mem->hashed)
6158 hibernate_hash_insert_page(mem);
6159 /*
6160 * the 'hole' between this vm_page_t and the previous
6161 * vm_page_t we moved needs to be initialized as
6162 * a range of free vm_page_t's
6163 */
6164 hibernate_free_range(sindx + 1, eindx);
6165
6166 eindx = sindx;
6167 }
6168 if (sindx)
6169 hibernate_free_range(0, sindx);
6170
6171 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6172
6173 /*
6174 * process the list of vm_page_t's that were entered in the hash,
6175 * but were not located in the vm_pages arrary... these are
6176 * vm_page_t's that were created on the fly (i.e. fictitious)
6177 */
6178 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6179 mem_next = mem->next;
6180
6181 mem->next = NULL;
6182 hibernate_hash_insert_page(mem);
6183 }
6184 hibernate_rebuild_hash_list = NULL;
6185
6186 clock_get_uptime(&endTime);
6187 SUB_ABSOLUTETIME(&endTime, &startTime);
6188 absolutetime_to_nanoseconds(endTime, &nsec);
6189
6190 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6191
6192 hibernate_rebuild_needed = FALSE;
6193
6194 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6195 }
6196
6197
6198 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6199
6200 uint32_t
6201 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6202 {
6203 unsigned int i;
6204 unsigned int compact_target_indx;
6205 vm_page_t mem, mem_next;
6206 vm_page_bucket_t *bucket;
6207 unsigned int mark_as_unneeded_pages = 0;
6208 unsigned int unneeded_vm_page_bucket_pages = 0;
6209 unsigned int unneeded_vm_pages_pages = 0;
6210 unsigned int unneeded_pmap_pages = 0;
6211 addr64_t start_of_unneeded = 0;
6212 addr64_t end_of_unneeded = 0;
6213
6214
6215 if (hibernate_should_abort())
6216 return (0);
6217
6218 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6219 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6220 vm_page_cleaned_count, compressor_object->resident_page_count);
6221
6222 for (i = 0; i < vm_page_bucket_count; i++) {
6223
6224 bucket = &vm_page_buckets[i];
6225
6226 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) {
6227 assert(mem->hashed);
6228
6229 mem_next = mem->next;
6230
6231 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6232 mem->next = hibernate_rebuild_hash_list;
6233 hibernate_rebuild_hash_list = mem;
6234 }
6235 }
6236 }
6237 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6238 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6239
6240 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6241
6242 compact_target_indx = 0;
6243
6244 for (i = 0; i < vm_pages_count; i++) {
6245
6246 mem = &vm_pages[i];
6247
6248 if (mem->free) {
6249 unsigned int color;
6250
6251 assert(mem->busy);
6252 assert(!mem->lopage);
6253
6254 color = mem->phys_page & vm_color_mask;
6255
6256 queue_remove(&vm_page_queue_free[color],
6257 mem,
6258 vm_page_t,
6259 pageq);
6260 mem->pageq.next = NULL;
6261 mem->pageq.prev = NULL;
6262
6263 vm_page_free_count--;
6264
6265 hibernate_teardown_found_free_pages++;
6266
6267 if ( !vm_pages[compact_target_indx].free)
6268 compact_target_indx = i;
6269 } else {
6270 /*
6271 * record this vm_page_t's original location
6272 * we need this even if it doesn't get moved
6273 * as an indicator to the rebuild function that
6274 * we don't have to move it
6275 */
6276 mem->next = mem;
6277
6278 if (vm_pages[compact_target_indx].free) {
6279 /*
6280 * we've got a hole to fill, so
6281 * move this vm_page_t to it's new home
6282 */
6283 vm_pages[compact_target_indx] = *mem;
6284 mem->free = TRUE;
6285
6286 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6287 compact_target_indx++;
6288 } else
6289 hibernate_teardown_last_valid_compact_indx = i;
6290 }
6291 }
6292 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6293 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6294 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6295
6296 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6297
6298 if (start_of_unneeded) {
6299 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6300 mark_as_unneeded_pages += unneeded_pmap_pages;
6301 }
6302 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6303
6304 hibernate_rebuild_needed = TRUE;
6305
6306 return (mark_as_unneeded_pages);
6307 }
6308
6309
6310 #endif /* HIBERNATION */
6311
6312 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6313
6314 #include <mach_vm_debug.h>
6315 #if MACH_VM_DEBUG
6316
6317 #include <mach_debug/hash_info.h>
6318 #include <vm/vm_debug.h>
6319
6320 /*
6321 * Routine: vm_page_info
6322 * Purpose:
6323 * Return information about the global VP table.
6324 * Fills the buffer with as much information as possible
6325 * and returns the desired size of the buffer.
6326 * Conditions:
6327 * Nothing locked. The caller should provide
6328 * possibly-pageable memory.
6329 */
6330
6331 unsigned int
6332 vm_page_info(
6333 hash_info_bucket_t *info,
6334 unsigned int count)
6335 {
6336 unsigned int i;
6337 lck_spin_t *bucket_lock;
6338
6339 if (vm_page_bucket_count < count)
6340 count = vm_page_bucket_count;
6341
6342 for (i = 0; i < count; i++) {
6343 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6344 unsigned int bucket_count = 0;
6345 vm_page_t m;
6346
6347 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6348 lck_spin_lock(bucket_lock);
6349
6350 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
6351 bucket_count++;
6352
6353 lck_spin_unlock(bucket_lock);
6354
6355 /* don't touch pageable memory while holding locks */
6356 info[i].hib_count = bucket_count;
6357 }
6358
6359 return vm_page_bucket_count;
6360 }
6361 #endif /* MACH_VM_DEBUG */
6362
6363 #if VM_PAGE_BUCKETS_CHECK
6364 void
6365 vm_page_buckets_check(void)
6366 {
6367 unsigned int i;
6368 vm_page_t p;
6369 unsigned int p_hash;
6370 vm_page_bucket_t *bucket;
6371 lck_spin_t *bucket_lock;
6372
6373 if (!vm_page_buckets_check_ready) {
6374 return;
6375 }
6376
6377 #if HIBERNATION
6378 if (hibernate_rebuild_needed ||
6379 hibernate_rebuild_hash_list) {
6380 panic("BUCKET_CHECK: hibernation in progress: "
6381 "rebuild_needed=%d rebuild_hash_list=%p\n",
6382 hibernate_rebuild_needed,
6383 hibernate_rebuild_hash_list);
6384 }
6385 #endif /* HIBERNATION */
6386
6387 #if VM_PAGE_FAKE_BUCKETS
6388 char *cp;
6389 for (cp = (char *) vm_page_fake_buckets_start;
6390 cp < (char *) vm_page_fake_buckets_end;
6391 cp++) {
6392 if (*cp != 0x5a) {
6393 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6394 "[0x%llx:0x%llx]\n",
6395 cp,
6396 vm_page_fake_buckets_start,
6397 vm_page_fake_buckets_end);
6398 }
6399 }
6400 #endif /* VM_PAGE_FAKE_BUCKETS */
6401
6402 for (i = 0; i < vm_page_bucket_count; i++) {
6403 bucket = &vm_page_buckets[i];
6404 if (bucket->pages == VM_PAGE_NULL) {
6405 continue;
6406 }
6407
6408 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6409 lck_spin_lock(bucket_lock);
6410 p = bucket->pages;
6411 while (p != VM_PAGE_NULL) {
6412 if (!p->hashed) {
6413 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6414 "hash %d in bucket %d at %p "
6415 "is not hashed\n",
6416 p, p->object, p->offset,
6417 p_hash, i, bucket);
6418 }
6419 p_hash = vm_page_hash(p->object, p->offset);
6420 if (p_hash != i) {
6421 panic("BUCKET_CHECK: corruption in bucket %d "
6422 "at %p: page %p object %p offset 0x%llx "
6423 "hash %d\n",
6424 i, bucket, p, p->object, p->offset,
6425 p_hash);
6426 }
6427 p = p->next;
6428 }
6429 lck_spin_unlock(bucket_lock);
6430 }
6431
6432 // printf("BUCKET_CHECK: checked buckets\n");
6433 }
6434 #endif /* VM_PAGE_BUCKETS_CHECK */