]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
2bb8118c24f1a0686c775823881bd06b2b6455f6
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <vm/pmap.h>
80 #include <vm/vm_init.h>
81 #include <vm/vm_map.h>
82 #include <vm/vm_page.h>
83 #include <vm/vm_pageout.h>
84 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
85 #include <kern/misc_protos.h>
86 #include <zone_debug.h>
87 #include <vm/cpm.h>
88 #include <pexpert/pexpert.h>
89
90 #include <vm/vm_protos.h>
91 #include <vm/memory_object.h>
92 #include <vm/vm_purgeable_internal.h>
93 #include <vm/vm_compressor.h>
94
95 #include <IOKit/IOHibernatePrivate.h>
96
97 #include <sys/kdebug.h>
98
99 boolean_t hibernate_cleaning_in_progress = FALSE;
100 boolean_t vm_page_free_verify = TRUE;
101
102 uint32_t vm_lopage_free_count = 0;
103 uint32_t vm_lopage_free_limit = 0;
104 uint32_t vm_lopage_lowater = 0;
105 boolean_t vm_lopage_refill = FALSE;
106 boolean_t vm_lopage_needed = FALSE;
107
108 lck_mtx_ext_t vm_page_queue_lock_ext;
109 lck_mtx_ext_t vm_page_queue_free_lock_ext;
110 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
111
112 int speculative_age_index = 0;
113 int speculative_steal_index = 0;
114 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
115
116
117 __private_extern__ void vm_page_init_lck_grp(void);
118
119 static void vm_page_free_prepare(vm_page_t page);
120 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
121
122
123
124
125 /*
126 * Associated with page of user-allocatable memory is a
127 * page structure.
128 */
129
130 /*
131 * These variables record the values returned by vm_page_bootstrap,
132 * for debugging purposes. The implementation of pmap_steal_memory
133 * and pmap_startup here also uses them internally.
134 */
135
136 vm_offset_t virtual_space_start;
137 vm_offset_t virtual_space_end;
138 uint32_t vm_page_pages;
139
140 /*
141 * The vm_page_lookup() routine, which provides for fast
142 * (virtual memory object, offset) to page lookup, employs
143 * the following hash table. The vm_page_{insert,remove}
144 * routines install and remove associations in the table.
145 * [This table is often called the virtual-to-physical,
146 * or VP, table.]
147 */
148 typedef struct {
149 vm_page_t pages;
150 #if MACH_PAGE_HASH_STATS
151 int cur_count; /* current count */
152 int hi_count; /* high water mark */
153 #endif /* MACH_PAGE_HASH_STATS */
154 } vm_page_bucket_t;
155
156
157 #define BUCKETS_PER_LOCK 16
158
159 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
160 unsigned int vm_page_bucket_count = 0; /* How big is array? */
161 unsigned int vm_page_hash_mask; /* Mask for hash function */
162 unsigned int vm_page_hash_shift; /* Shift for hash function */
163 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
164 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
165
166 lck_spin_t *vm_page_bucket_locks;
167
168
169 #if MACH_PAGE_HASH_STATS
170 /* This routine is only for debug. It is intended to be called by
171 * hand by a developer using a kernel debugger. This routine prints
172 * out vm_page_hash table statistics to the kernel debug console.
173 */
174 void
175 hash_debug(void)
176 {
177 int i;
178 int numbuckets = 0;
179 int highsum = 0;
180 int maxdepth = 0;
181
182 for (i = 0; i < vm_page_bucket_count; i++) {
183 if (vm_page_buckets[i].hi_count) {
184 numbuckets++;
185 highsum += vm_page_buckets[i].hi_count;
186 if (vm_page_buckets[i].hi_count > maxdepth)
187 maxdepth = vm_page_buckets[i].hi_count;
188 }
189 }
190 printf("Total number of buckets: %d\n", vm_page_bucket_count);
191 printf("Number used buckets: %d = %d%%\n",
192 numbuckets, 100*numbuckets/vm_page_bucket_count);
193 printf("Number unused buckets: %d = %d%%\n",
194 vm_page_bucket_count - numbuckets,
195 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
196 printf("Sum of bucket max depth: %d\n", highsum);
197 printf("Average bucket depth: %d.%2d\n",
198 highsum/vm_page_bucket_count,
199 highsum%vm_page_bucket_count);
200 printf("Maximum bucket depth: %d\n", maxdepth);
201 }
202 #endif /* MACH_PAGE_HASH_STATS */
203
204 /*
205 * The virtual page size is currently implemented as a runtime
206 * variable, but is constant once initialized using vm_set_page_size.
207 * This initialization must be done in the machine-dependent
208 * bootstrap sequence, before calling other machine-independent
209 * initializations.
210 *
211 * All references to the virtual page size outside this
212 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
213 * constants.
214 */
215 vm_size_t page_size = PAGE_SIZE;
216 vm_size_t page_mask = PAGE_MASK;
217 int page_shift = PAGE_SHIFT;
218
219 /*
220 * Resident page structures are initialized from
221 * a template (see vm_page_alloc).
222 *
223 * When adding a new field to the virtual memory
224 * object structure, be sure to add initialization
225 * (see vm_page_bootstrap).
226 */
227 struct vm_page vm_page_template;
228
229 vm_page_t vm_pages = VM_PAGE_NULL;
230 unsigned int vm_pages_count = 0;
231 ppnum_t vm_page_lowest = 0;
232
233 /*
234 * Resident pages that represent real memory
235 * are allocated from a set of free lists,
236 * one per color.
237 */
238 unsigned int vm_colors;
239 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
240 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
241 queue_head_t vm_page_queue_free[MAX_COLORS];
242 unsigned int vm_page_free_wanted;
243 unsigned int vm_page_free_wanted_privileged;
244 unsigned int vm_page_free_count;
245 unsigned int vm_page_fictitious_count;
246
247 unsigned int vm_page_free_count_minimum; /* debugging */
248
249 /*
250 * Occasionally, the virtual memory system uses
251 * resident page structures that do not refer to
252 * real pages, for example to leave a page with
253 * important state information in the VP table.
254 *
255 * These page structures are allocated the way
256 * most other kernel structures are.
257 */
258 zone_t vm_page_zone;
259 vm_locks_array_t vm_page_locks;
260 decl_lck_mtx_data(,vm_page_alloc_lock)
261 lck_mtx_ext_t vm_page_alloc_lock_ext;
262
263 unsigned int io_throttle_zero_fill;
264
265 unsigned int vm_page_local_q_count = 0;
266 unsigned int vm_page_local_q_soft_limit = 250;
267 unsigned int vm_page_local_q_hard_limit = 500;
268 struct vplq *vm_page_local_q = NULL;
269
270 /* N.B. Guard and fictitious pages must not
271 * be assigned a zero phys_page value.
272 */
273 /*
274 * Fictitious pages don't have a physical address,
275 * but we must initialize phys_page to something.
276 * For debugging, this should be a strange value
277 * that the pmap module can recognize in assertions.
278 */
279 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
280
281 /*
282 * Guard pages are not accessible so they don't
283 * need a physical address, but we need to enter
284 * one in the pmap.
285 * Let's make it recognizable and make sure that
286 * we don't use a real physical page with that
287 * physical address.
288 */
289 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
290
291 /*
292 * Resident page structures are also chained on
293 * queues that are used by the page replacement
294 * system (pageout daemon). These queues are
295 * defined here, but are shared by the pageout
296 * module. The inactive queue is broken into
297 * file backed and anonymous for convenience as the
298 * pageout daemon often assignes a higher
299 * importance to anonymous pages (less likely to pick)
300 */
301 queue_head_t vm_page_queue_active;
302 queue_head_t vm_page_queue_inactive;
303 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
304 queue_head_t vm_page_queue_throttled;
305
306 unsigned int vm_page_active_count;
307 unsigned int vm_page_inactive_count;
308 unsigned int vm_page_anonymous_count;
309 unsigned int vm_page_throttled_count;
310 unsigned int vm_page_speculative_count;
311 unsigned int vm_page_wire_count;
312 unsigned int vm_page_wire_count_initial;
313 unsigned int vm_page_gobble_count = 0;
314 unsigned int vm_page_wire_count_warning = 0;
315 unsigned int vm_page_gobble_count_warning = 0;
316
317 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
318 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
319 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
320
321 unsigned int vm_page_external_count = 0;
322 unsigned int vm_page_internal_count = 0;
323 unsigned int vm_page_pageable_external_count = 0;
324 unsigned int vm_page_pageable_internal_count = 0;
325
326 #if DEVELOPMENT || DEBUG
327 unsigned int vm_page_speculative_recreated = 0;
328 unsigned int vm_page_speculative_created = 0;
329 unsigned int vm_page_speculative_used = 0;
330 #endif
331
332 queue_head_t vm_page_queue_cleaned;
333
334 unsigned int vm_page_cleaned_count = 0;
335 unsigned int vm_pageout_enqueued_cleaned = 0;
336
337 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
338 ppnum_t max_valid_low_ppnum = 0xffffffff;
339
340
341 /*
342 * Several page replacement parameters are also
343 * shared with this module, so that page allocation
344 * (done here in vm_page_alloc) can trigger the
345 * pageout daemon.
346 */
347 unsigned int vm_page_free_target = 0;
348 unsigned int vm_page_free_min = 0;
349 unsigned int vm_page_throttle_limit = 0;
350 uint32_t vm_page_creation_throttle = 0;
351 unsigned int vm_page_inactive_target = 0;
352 unsigned int vm_page_anonymous_min = 0;
353 unsigned int vm_page_inactive_min = 0;
354 unsigned int vm_page_free_reserved = 0;
355 unsigned int vm_page_throttle_count = 0;
356
357
358 /*
359 * The VM system has a couple of heuristics for deciding
360 * that pages are "uninteresting" and should be placed
361 * on the inactive queue as likely candidates for replacement.
362 * These variables let the heuristics be controlled at run-time
363 * to make experimentation easier.
364 */
365
366 boolean_t vm_page_deactivate_hint = TRUE;
367
368 struct vm_page_stats_reusable vm_page_stats_reusable;
369
370 /*
371 * vm_set_page_size:
372 *
373 * Sets the page size, perhaps based upon the memory
374 * size. Must be called before any use of page-size
375 * dependent functions.
376 *
377 * Sets page_shift and page_mask from page_size.
378 */
379 void
380 vm_set_page_size(void)
381 {
382 page_mask = page_size - 1;
383
384 if ((page_mask & page_size) != 0)
385 panic("vm_set_page_size: page size not a power of two");
386
387 for (page_shift = 0; ; page_shift++)
388 if ((1U << page_shift) == page_size)
389 break;
390 }
391
392
393 /* Called once during statup, once the cache geometry is known.
394 */
395 static void
396 vm_page_set_colors( void )
397 {
398 unsigned int n, override;
399
400 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
401 n = override;
402 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
403 n = vm_cache_geometry_colors;
404 else n = DEFAULT_COLORS; /* use default if all else fails */
405
406 if ( n == 0 )
407 n = 1;
408 if ( n > MAX_COLORS )
409 n = MAX_COLORS;
410
411 /* the count must be a power of 2 */
412 if ( ( n & (n - 1)) != 0 )
413 panic("vm_page_set_colors");
414
415 vm_colors = n;
416 vm_color_mask = n - 1;
417 }
418
419
420 lck_grp_t vm_page_lck_grp_free;
421 lck_grp_t vm_page_lck_grp_queue;
422 lck_grp_t vm_page_lck_grp_local;
423 lck_grp_t vm_page_lck_grp_purge;
424 lck_grp_t vm_page_lck_grp_alloc;
425 lck_grp_t vm_page_lck_grp_bucket;
426 lck_grp_attr_t vm_page_lck_grp_attr;
427 lck_attr_t vm_page_lck_attr;
428
429
430 __private_extern__ void
431 vm_page_init_lck_grp(void)
432 {
433 /*
434 * initialze the vm_page lock world
435 */
436 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
437 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
438 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
439 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
440 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
441 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
442 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
443 lck_attr_setdefault(&vm_page_lck_attr);
444 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
445
446 vm_compressor_init_locks();
447 }
448
449 void
450 vm_page_init_local_q()
451 {
452 unsigned int num_cpus;
453 unsigned int i;
454 struct vplq *t_local_q;
455
456 num_cpus = ml_get_max_cpus();
457
458 /*
459 * no point in this for a uni-processor system
460 */
461 if (num_cpus >= 2) {
462 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
463
464 for (i = 0; i < num_cpus; i++) {
465 struct vpl *lq;
466
467 lq = &t_local_q[i].vpl_un.vpl;
468 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
469 queue_init(&lq->vpl_queue);
470 lq->vpl_count = 0;
471 lq->vpl_internal_count = 0;
472 lq->vpl_external_count = 0;
473 }
474 vm_page_local_q_count = num_cpus;
475
476 vm_page_local_q = (struct vplq *)t_local_q;
477 }
478 }
479
480
481 /*
482 * vm_page_bootstrap:
483 *
484 * Initializes the resident memory module.
485 *
486 * Allocates memory for the page cells, and
487 * for the object/offset-to-page hash table headers.
488 * Each page cell is initialized and placed on the free list.
489 * Returns the range of available kernel virtual memory.
490 */
491
492 void
493 vm_page_bootstrap(
494 vm_offset_t *startp,
495 vm_offset_t *endp)
496 {
497 register vm_page_t m;
498 unsigned int i;
499 unsigned int log1;
500 unsigned int log2;
501 unsigned int size;
502
503 /*
504 * Initialize the vm_page template.
505 */
506
507 m = &vm_page_template;
508 bzero(m, sizeof (*m));
509
510 m->pageq.next = NULL;
511 m->pageq.prev = NULL;
512 m->listq.next = NULL;
513 m->listq.prev = NULL;
514 m->next = VM_PAGE_NULL;
515
516 m->object = VM_OBJECT_NULL; /* reset later */
517 m->offset = (vm_object_offset_t) -1; /* reset later */
518
519 m->wire_count = 0;
520 m->local = FALSE;
521 m->inactive = FALSE;
522 m->active = FALSE;
523 m->pageout_queue = FALSE;
524 m->speculative = FALSE;
525 m->laundry = FALSE;
526 m->free = FALSE;
527 m->reference = FALSE;
528 m->gobbled = FALSE;
529 m->private = FALSE;
530 m->throttled = FALSE;
531 m->__unused_pageq_bits = 0;
532
533 m->phys_page = 0; /* reset later */
534
535 m->busy = TRUE;
536 m->wanted = FALSE;
537 m->tabled = FALSE;
538 m->fictitious = FALSE;
539 m->pmapped = FALSE;
540 m->wpmapped = FALSE;
541 m->pageout = FALSE;
542 m->absent = FALSE;
543 m->error = FALSE;
544 m->dirty = FALSE;
545 m->cleaning = FALSE;
546 m->precious = FALSE;
547 m->clustered = FALSE;
548 m->overwriting = FALSE;
549 m->restart = FALSE;
550 m->unusual = FALSE;
551 m->encrypted = FALSE;
552 m->encrypted_cleaning = FALSE;
553 m->cs_validated = FALSE;
554 m->cs_tainted = FALSE;
555 m->no_cache = FALSE;
556 m->reusable = FALSE;
557 m->slid = FALSE;
558 m->was_dirty = FALSE;
559 m->xpmapped = FALSE;
560 m->compressor = FALSE;
561 m->__unused_object_bits = 0;
562
563 /*
564 * Initialize the page queues.
565 */
566 vm_page_init_lck_grp();
567
568 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
569 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
570 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
571
572 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
573 int group;
574
575 purgeable_queues[i].token_q_head = 0;
576 purgeable_queues[i].token_q_tail = 0;
577 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
578 queue_init(&purgeable_queues[i].objq[group]);
579
580 purgeable_queues[i].type = i;
581 purgeable_queues[i].new_pages = 0;
582 #if MACH_ASSERT
583 purgeable_queues[i].debug_count_tokens = 0;
584 purgeable_queues[i].debug_count_objects = 0;
585 #endif
586 };
587
588 for (i = 0; i < MAX_COLORS; i++ )
589 queue_init(&vm_page_queue_free[i]);
590
591 queue_init(&vm_lopage_queue_free);
592 queue_init(&vm_page_queue_active);
593 queue_init(&vm_page_queue_inactive);
594 queue_init(&vm_page_queue_cleaned);
595 queue_init(&vm_page_queue_throttled);
596 queue_init(&vm_page_queue_anonymous);
597
598 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
599 queue_init(&vm_page_queue_speculative[i].age_q);
600
601 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
602 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
603 }
604 vm_page_free_wanted = 0;
605 vm_page_free_wanted_privileged = 0;
606
607 vm_page_set_colors();
608
609
610 /*
611 * Steal memory for the map and zone subsystems.
612 */
613 zone_steal_memory();
614 vm_map_steal_memory();
615
616 /*
617 * Allocate (and initialize) the virtual-to-physical
618 * table hash buckets.
619 *
620 * The number of buckets should be a power of two to
621 * get a good hash function. The following computation
622 * chooses the first power of two that is greater
623 * than the number of physical pages in the system.
624 */
625
626 if (vm_page_bucket_count == 0) {
627 unsigned int npages = pmap_free_pages();
628
629 vm_page_bucket_count = 1;
630 while (vm_page_bucket_count < npages)
631 vm_page_bucket_count <<= 1;
632 }
633 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
634
635 vm_page_hash_mask = vm_page_bucket_count - 1;
636
637 /*
638 * Calculate object shift value for hashing algorithm:
639 * O = log2(sizeof(struct vm_object))
640 * B = log2(vm_page_bucket_count)
641 * hash shifts the object left by
642 * B/2 - O
643 */
644 size = vm_page_bucket_count;
645 for (log1 = 0; size > 1; log1++)
646 size /= 2;
647 size = sizeof(struct vm_object);
648 for (log2 = 0; size > 1; log2++)
649 size /= 2;
650 vm_page_hash_shift = log1/2 - log2 + 1;
651
652 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
653 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
654 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
655
656 if (vm_page_hash_mask & vm_page_bucket_count)
657 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
658
659 vm_page_buckets = (vm_page_bucket_t *)
660 pmap_steal_memory(vm_page_bucket_count *
661 sizeof(vm_page_bucket_t));
662
663 vm_page_bucket_locks = (lck_spin_t *)
664 pmap_steal_memory(vm_page_bucket_lock_count *
665 sizeof(lck_spin_t));
666
667 for (i = 0; i < vm_page_bucket_count; i++) {
668 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
669
670 bucket->pages = VM_PAGE_NULL;
671 #if MACH_PAGE_HASH_STATS
672 bucket->cur_count = 0;
673 bucket->hi_count = 0;
674 #endif /* MACH_PAGE_HASH_STATS */
675 }
676
677 for (i = 0; i < vm_page_bucket_lock_count; i++)
678 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
679
680 /*
681 * Machine-dependent code allocates the resident page table.
682 * It uses vm_page_init to initialize the page frames.
683 * The code also returns to us the virtual space available
684 * to the kernel. We don't trust the pmap module
685 * to get the alignment right.
686 */
687
688 pmap_startup(&virtual_space_start, &virtual_space_end);
689 virtual_space_start = round_page(virtual_space_start);
690 virtual_space_end = trunc_page(virtual_space_end);
691
692 *startp = virtual_space_start;
693 *endp = virtual_space_end;
694
695 /*
696 * Compute the initial "wire" count.
697 * Up until now, the pages which have been set aside are not under
698 * the VM system's control, so although they aren't explicitly
699 * wired, they nonetheless can't be moved. At this moment,
700 * all VM managed pages are "free", courtesy of pmap_startup.
701 */
702 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
703 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
704 vm_page_wire_count_initial = vm_page_wire_count;
705 vm_page_free_count_minimum = vm_page_free_count;
706
707 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
708 vm_page_free_count, vm_page_wire_count);
709
710 simple_lock_init(&vm_paging_lock, 0);
711 }
712
713 #ifndef MACHINE_PAGES
714 /*
715 * We implement pmap_steal_memory and pmap_startup with the help
716 * of two simpler functions, pmap_virtual_space and pmap_next_page.
717 */
718
719 void *
720 pmap_steal_memory(
721 vm_size_t size)
722 {
723 vm_offset_t addr, vaddr;
724 ppnum_t phys_page;
725
726 /*
727 * We round the size to a round multiple.
728 */
729
730 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
731
732 /*
733 * If this is the first call to pmap_steal_memory,
734 * we have to initialize ourself.
735 */
736
737 if (virtual_space_start == virtual_space_end) {
738 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
739
740 /*
741 * The initial values must be aligned properly, and
742 * we don't trust the pmap module to do it right.
743 */
744
745 virtual_space_start = round_page(virtual_space_start);
746 virtual_space_end = trunc_page(virtual_space_end);
747 }
748
749 /*
750 * Allocate virtual memory for this request.
751 */
752
753 addr = virtual_space_start;
754 virtual_space_start += size;
755
756 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
757
758 /*
759 * Allocate and map physical pages to back new virtual pages.
760 */
761
762 for (vaddr = round_page(addr);
763 vaddr < addr + size;
764 vaddr += PAGE_SIZE) {
765
766 if (!pmap_next_page_hi(&phys_page))
767 panic("pmap_steal_memory");
768
769 /*
770 * XXX Logically, these mappings should be wired,
771 * but some pmap modules barf if they are.
772 */
773 #if defined(__LP64__)
774 pmap_pre_expand(kernel_pmap, vaddr);
775 #endif
776
777 pmap_enter(kernel_pmap, vaddr, phys_page,
778 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
779 VM_WIMG_USE_DEFAULT, FALSE);
780 /*
781 * Account for newly stolen memory
782 */
783 vm_page_wire_count++;
784
785 }
786
787 return (void *) addr;
788 }
789
790 void
791 pmap_startup(
792 vm_offset_t *startp,
793 vm_offset_t *endp)
794 {
795 unsigned int i, npages, pages_initialized, fill, fillval;
796 ppnum_t phys_page;
797 addr64_t tmpaddr;
798
799 /*
800 * We calculate how many page frames we will have
801 * and then allocate the page structures in one chunk.
802 */
803
804 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
805 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
806 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
807
808 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
809
810 /*
811 * Initialize the page frames.
812 */
813 for (i = 0, pages_initialized = 0; i < npages; i++) {
814 if (!pmap_next_page(&phys_page))
815 break;
816 if (pages_initialized == 0 || phys_page < vm_page_lowest)
817 vm_page_lowest = phys_page;
818
819 vm_page_init(&vm_pages[i], phys_page, FALSE);
820 vm_page_pages++;
821 pages_initialized++;
822 }
823 vm_pages_count = pages_initialized;
824
825 /*
826 * Check if we want to initialize pages to a known value
827 */
828 fill = 0; /* Assume no fill */
829 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
830 #if DEBUG
831 /* This slows down booting the DEBUG kernel, particularly on
832 * large memory systems, but is worthwhile in deterministically
833 * trapping uninitialized memory usage.
834 */
835 if (fill == 0) {
836 fill = 1;
837 fillval = 0xDEB8F177;
838 }
839 #endif
840 if (fill)
841 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
842 // -debug code remove
843 if (2 == vm_himemory_mode) {
844 // free low -> high so high is preferred
845 for (i = 1; i <= pages_initialized; i++) {
846 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
847 vm_page_release(&vm_pages[i - 1]);
848 }
849 }
850 else
851 // debug code remove-
852
853 /*
854 * Release pages in reverse order so that physical pages
855 * initially get allocated in ascending addresses. This keeps
856 * the devices (which must address physical memory) happy if
857 * they require several consecutive pages.
858 */
859 for (i = pages_initialized; i > 0; i--) {
860 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
861 vm_page_release(&vm_pages[i - 1]);
862 }
863
864 #if 0
865 {
866 vm_page_t xx, xxo, xxl;
867 int i, j, k, l;
868
869 j = 0; /* (BRINGUP) */
870 xxl = 0;
871
872 for( i = 0; i < vm_colors; i++ ) {
873 queue_iterate(&vm_page_queue_free[i],
874 xx,
875 vm_page_t,
876 pageq) { /* BRINGUP */
877 j++; /* (BRINGUP) */
878 if(j > vm_page_free_count) { /* (BRINGUP) */
879 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
880 }
881
882 l = vm_page_free_count - j; /* (BRINGUP) */
883 k = 0; /* (BRINGUP) */
884
885 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
886
887 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
888 k++;
889 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
890 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
891 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
892 }
893 }
894
895 xxl = xx;
896 }
897 }
898
899 if(j != vm_page_free_count) { /* (BRINGUP) */
900 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
901 }
902 }
903 #endif
904
905
906 /*
907 * We have to re-align virtual_space_start,
908 * because pmap_steal_memory has been using it.
909 */
910
911 virtual_space_start = round_page(virtual_space_start);
912
913 *startp = virtual_space_start;
914 *endp = virtual_space_end;
915 }
916 #endif /* MACHINE_PAGES */
917
918 /*
919 * Routine: vm_page_module_init
920 * Purpose:
921 * Second initialization pass, to be done after
922 * the basic VM system is ready.
923 */
924 void
925 vm_page_module_init(void)
926 {
927 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
928 0, PAGE_SIZE, "vm pages");
929
930 #if ZONE_DEBUG
931 zone_debug_disable(vm_page_zone);
932 #endif /* ZONE_DEBUG */
933
934 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
935 zone_change(vm_page_zone, Z_EXPAND, FALSE);
936 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
937 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
938 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
939 /*
940 * Adjust zone statistics to account for the real pages allocated
941 * in vm_page_create(). [Q: is this really what we want?]
942 */
943 vm_page_zone->count += vm_page_pages;
944 vm_page_zone->sum_count += vm_page_pages;
945 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
946 }
947
948 /*
949 * Routine: vm_page_create
950 * Purpose:
951 * After the VM system is up, machine-dependent code
952 * may stumble across more physical memory. For example,
953 * memory that it was reserving for a frame buffer.
954 * vm_page_create turns this memory into available pages.
955 */
956
957 void
958 vm_page_create(
959 ppnum_t start,
960 ppnum_t end)
961 {
962 ppnum_t phys_page;
963 vm_page_t m;
964
965 for (phys_page = start;
966 phys_page < end;
967 phys_page++) {
968 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
969 == VM_PAGE_NULL)
970 vm_page_more_fictitious();
971
972 m->fictitious = FALSE;
973 pmap_clear_noencrypt(phys_page);
974
975 vm_page_pages++;
976 vm_page_release(m);
977 }
978 }
979
980 /*
981 * vm_page_hash:
982 *
983 * Distributes the object/offset key pair among hash buckets.
984 *
985 * NOTE: The bucket count must be a power of 2
986 */
987 #define vm_page_hash(object, offset) (\
988 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
989 & vm_page_hash_mask)
990
991
992 /*
993 * vm_page_insert: [ internal use only ]
994 *
995 * Inserts the given mem entry into the object/object-page
996 * table and object list.
997 *
998 * The object must be locked.
999 */
1000 void
1001 vm_page_insert(
1002 vm_page_t mem,
1003 vm_object_t object,
1004 vm_object_offset_t offset)
1005 {
1006 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1007 }
1008
1009 void
1010 vm_page_insert_internal(
1011 vm_page_t mem,
1012 vm_object_t object,
1013 vm_object_offset_t offset,
1014 boolean_t queues_lock_held,
1015 boolean_t insert_in_hash,
1016 boolean_t batch_pmap_op)
1017 {
1018 vm_page_bucket_t *bucket;
1019 lck_spin_t *bucket_lock;
1020 int hash_id;
1021
1022 XPR(XPR_VM_PAGE,
1023 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1024 object, offset, mem, 0,0);
1025 #if 0
1026 /*
1027 * we may not hold the page queue lock
1028 * so this check isn't safe to make
1029 */
1030 VM_PAGE_CHECK(mem);
1031 #endif
1032
1033 assert(page_aligned(offset));
1034
1035 if (object == vm_submap_object) {
1036 /* the vm_submap_object is only a placeholder for submaps */
1037 panic("vm_page_insert(vm_submap_object,0x%llx)\n", offset);
1038 }
1039
1040 vm_object_lock_assert_exclusive(object);
1041 #if DEBUG
1042 lck_mtx_assert(&vm_page_queue_lock,
1043 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1044 : LCK_MTX_ASSERT_NOTOWNED);
1045 #endif /* DEBUG */
1046
1047 if (insert_in_hash == TRUE) {
1048 #if DEBUG
1049 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1050 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1051 "already in (obj=%p,off=0x%llx)",
1052 mem, object, offset, mem->object, mem->offset);
1053 #endif
1054 assert(!object->internal || offset < object->vo_size);
1055
1056 /* only insert "pageout" pages into "pageout" objects,
1057 * and normal pages into normal objects */
1058 assert(object->pageout == mem->pageout);
1059
1060 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1061
1062 /*
1063 * Record the object/offset pair in this page
1064 */
1065
1066 mem->object = object;
1067 mem->offset = offset;
1068
1069 /*
1070 * Insert it into the object_object/offset hash table
1071 */
1072 hash_id = vm_page_hash(object, offset);
1073 bucket = &vm_page_buckets[hash_id];
1074 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1075
1076 lck_spin_lock(bucket_lock);
1077
1078 mem->next = bucket->pages;
1079 bucket->pages = mem;
1080 #if MACH_PAGE_HASH_STATS
1081 if (++bucket->cur_count > bucket->hi_count)
1082 bucket->hi_count = bucket->cur_count;
1083 #endif /* MACH_PAGE_HASH_STATS */
1084
1085 lck_spin_unlock(bucket_lock);
1086 }
1087
1088 {
1089 unsigned int cache_attr;
1090
1091 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1092
1093 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1094 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1095 }
1096 }
1097 /*
1098 * Now link into the object's list of backed pages.
1099 */
1100 VM_PAGE_INSERT(mem, object);
1101 mem->tabled = TRUE;
1102
1103 /*
1104 * Show that the object has one more resident page.
1105 */
1106
1107 object->resident_page_count++;
1108 if (VM_PAGE_WIRED(mem)) {
1109 object->wired_page_count++;
1110 }
1111 assert(object->resident_page_count >= object->wired_page_count);
1112
1113 if (object->internal) {
1114 OSAddAtomic(1, &vm_page_internal_count);
1115 } else {
1116 OSAddAtomic(1, &vm_page_external_count);
1117 }
1118
1119 /*
1120 * It wouldn't make sense to insert a "reusable" page in
1121 * an object (the page would have been marked "reusable" only
1122 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1123 * in the object at that time).
1124 * But a page could be inserted in a "all_reusable" object, if
1125 * something faults it in (a vm_read() from another task or a
1126 * "use-after-free" issue in user space, for example). It can
1127 * also happen if we're relocating a page from that object to
1128 * a different physical page during a physically-contiguous
1129 * allocation.
1130 */
1131 assert(!mem->reusable);
1132 if (mem->object->all_reusable) {
1133 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1134 }
1135
1136 if (object->purgable == VM_PURGABLE_VOLATILE) {
1137 if (VM_PAGE_WIRED(mem)) {
1138 OSAddAtomic(1, &vm_page_purgeable_wired_count);
1139 } else {
1140 OSAddAtomic(1, &vm_page_purgeable_count);
1141 }
1142 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1143 mem->throttled) {
1144 /*
1145 * This page belongs to a purged VM object but hasn't
1146 * been purged (because it was "busy").
1147 * It's in the "throttled" queue and hence not
1148 * visible to vm_pageout_scan(). Move it to a pageable
1149 * queue, so that it can eventually be reclaimed, instead
1150 * of lingering in the "empty" object.
1151 */
1152 if (queues_lock_held == FALSE)
1153 vm_page_lockspin_queues();
1154 vm_page_deactivate(mem);
1155 if (queues_lock_held == FALSE)
1156 vm_page_unlock_queues();
1157 }
1158 }
1159
1160 /*
1161 * vm_page_replace:
1162 *
1163 * Exactly like vm_page_insert, except that we first
1164 * remove any existing page at the given offset in object.
1165 *
1166 * The object must be locked.
1167 */
1168 void
1169 vm_page_replace(
1170 register vm_page_t mem,
1171 register vm_object_t object,
1172 register vm_object_offset_t offset)
1173 {
1174 vm_page_bucket_t *bucket;
1175 vm_page_t found_m = VM_PAGE_NULL;
1176 lck_spin_t *bucket_lock;
1177 int hash_id;
1178
1179 #if 0
1180 /*
1181 * we don't hold the page queue lock
1182 * so this check isn't safe to make
1183 */
1184 VM_PAGE_CHECK(mem);
1185 #endif
1186 vm_object_lock_assert_exclusive(object);
1187 #if DEBUG
1188 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1189 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1190 "already in (obj=%p,off=0x%llx)",
1191 mem, object, offset, mem->object, mem->offset);
1192 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1193 #endif
1194 /*
1195 * Record the object/offset pair in this page
1196 */
1197
1198 mem->object = object;
1199 mem->offset = offset;
1200
1201 /*
1202 * Insert it into the object_object/offset hash table,
1203 * replacing any page that might have been there.
1204 */
1205
1206 hash_id = vm_page_hash(object, offset);
1207 bucket = &vm_page_buckets[hash_id];
1208 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1209
1210 lck_spin_lock(bucket_lock);
1211
1212 if (bucket->pages) {
1213 vm_page_t *mp = &bucket->pages;
1214 vm_page_t m = *mp;
1215
1216 do {
1217 if (m->object == object && m->offset == offset) {
1218 /*
1219 * Remove old page from hash list
1220 */
1221 *mp = m->next;
1222
1223 found_m = m;
1224 break;
1225 }
1226 mp = &m->next;
1227 } while ((m = *mp));
1228
1229 mem->next = bucket->pages;
1230 } else {
1231 mem->next = VM_PAGE_NULL;
1232 }
1233 /*
1234 * insert new page at head of hash list
1235 */
1236 bucket->pages = mem;
1237
1238 lck_spin_unlock(bucket_lock);
1239
1240 if (found_m) {
1241 /*
1242 * there was already a page at the specified
1243 * offset for this object... remove it from
1244 * the object and free it back to the free list
1245 */
1246 vm_page_free_unlocked(found_m, FALSE);
1247 }
1248 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1249 }
1250
1251 /*
1252 * vm_page_remove: [ internal use only ]
1253 *
1254 * Removes the given mem entry from the object/offset-page
1255 * table and the object page list.
1256 *
1257 * The object must be locked.
1258 */
1259
1260 void
1261 vm_page_remove(
1262 vm_page_t mem,
1263 boolean_t remove_from_hash)
1264 {
1265 vm_page_bucket_t *bucket;
1266 vm_page_t this;
1267 lck_spin_t *bucket_lock;
1268 int hash_id;
1269
1270 XPR(XPR_VM_PAGE,
1271 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1272 mem->object, mem->offset,
1273 mem, 0,0);
1274
1275 vm_object_lock_assert_exclusive(mem->object);
1276 assert(mem->tabled);
1277 assert(!mem->cleaning);
1278 assert(!mem->laundry);
1279 #if 0
1280 /*
1281 * we don't hold the page queue lock
1282 * so this check isn't safe to make
1283 */
1284 VM_PAGE_CHECK(mem);
1285 #endif
1286 if (remove_from_hash == TRUE) {
1287 /*
1288 * Remove from the object_object/offset hash table
1289 */
1290 hash_id = vm_page_hash(mem->object, mem->offset);
1291 bucket = &vm_page_buckets[hash_id];
1292 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1293
1294 lck_spin_lock(bucket_lock);
1295
1296 if ((this = bucket->pages) == mem) {
1297 /* optimize for common case */
1298
1299 bucket->pages = mem->next;
1300 } else {
1301 vm_page_t *prev;
1302
1303 for (prev = &this->next;
1304 (this = *prev) != mem;
1305 prev = &this->next)
1306 continue;
1307 *prev = this->next;
1308 }
1309 #if MACH_PAGE_HASH_STATS
1310 bucket->cur_count--;
1311 #endif /* MACH_PAGE_HASH_STATS */
1312
1313 lck_spin_unlock(bucket_lock);
1314 }
1315 /*
1316 * Now remove from the object's list of backed pages.
1317 */
1318
1319 VM_PAGE_REMOVE(mem);
1320
1321 /*
1322 * And show that the object has one fewer resident
1323 * page.
1324 */
1325
1326 assert(mem->object->resident_page_count > 0);
1327 mem->object->resident_page_count--;
1328
1329 if (mem->object->internal) {
1330 assert(vm_page_internal_count);
1331 OSAddAtomic(-1, &vm_page_internal_count);
1332 } else {
1333 assert(vm_page_external_count);
1334 OSAddAtomic(-1, &vm_page_external_count);
1335 }
1336 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1337 if (mem->object->resident_page_count == 0)
1338 vm_object_cache_remove(mem->object);
1339 }
1340
1341 if (VM_PAGE_WIRED(mem)) {
1342 assert(mem->object->wired_page_count > 0);
1343 mem->object->wired_page_count--;
1344 }
1345 assert(mem->object->resident_page_count >=
1346 mem->object->wired_page_count);
1347 if (mem->reusable) {
1348 assert(mem->object->reusable_page_count > 0);
1349 mem->object->reusable_page_count--;
1350 assert(mem->object->reusable_page_count <=
1351 mem->object->resident_page_count);
1352 mem->reusable = FALSE;
1353 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1354 vm_page_stats_reusable.reused_remove++;
1355 } else if (mem->object->all_reusable) {
1356 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1357 vm_page_stats_reusable.reused_remove++;
1358 }
1359
1360 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1361 if (VM_PAGE_WIRED(mem)) {
1362 assert(vm_page_purgeable_wired_count > 0);
1363 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1364 } else {
1365 assert(vm_page_purgeable_count > 0);
1366 OSAddAtomic(-1, &vm_page_purgeable_count);
1367 }
1368 }
1369 if (mem->object->set_cache_attr == TRUE)
1370 pmap_set_cache_attributes(mem->phys_page, 0);
1371
1372 mem->tabled = FALSE;
1373 mem->object = VM_OBJECT_NULL;
1374 mem->offset = (vm_object_offset_t) -1;
1375 }
1376
1377
1378 /*
1379 * vm_page_lookup:
1380 *
1381 * Returns the page associated with the object/offset
1382 * pair specified; if none is found, VM_PAGE_NULL is returned.
1383 *
1384 * The object must be locked. No side effects.
1385 */
1386
1387 unsigned long vm_page_lookup_hint = 0;
1388 unsigned long vm_page_lookup_hint_next = 0;
1389 unsigned long vm_page_lookup_hint_prev = 0;
1390 unsigned long vm_page_lookup_hint_miss = 0;
1391 unsigned long vm_page_lookup_bucket_NULL = 0;
1392 unsigned long vm_page_lookup_miss = 0;
1393
1394
1395 vm_page_t
1396 vm_page_lookup(
1397 vm_object_t object,
1398 vm_object_offset_t offset)
1399 {
1400 vm_page_t mem;
1401 vm_page_bucket_t *bucket;
1402 queue_entry_t qe;
1403 lck_spin_t *bucket_lock;
1404 int hash_id;
1405
1406 vm_object_lock_assert_held(object);
1407 mem = object->memq_hint;
1408
1409 if (mem != VM_PAGE_NULL) {
1410 assert(mem->object == object);
1411
1412 if (mem->offset == offset) {
1413 vm_page_lookup_hint++;
1414 return mem;
1415 }
1416 qe = queue_next(&mem->listq);
1417
1418 if (! queue_end(&object->memq, qe)) {
1419 vm_page_t next_page;
1420
1421 next_page = (vm_page_t) qe;
1422 assert(next_page->object == object);
1423
1424 if (next_page->offset == offset) {
1425 vm_page_lookup_hint_next++;
1426 object->memq_hint = next_page; /* new hint */
1427 return next_page;
1428 }
1429 }
1430 qe = queue_prev(&mem->listq);
1431
1432 if (! queue_end(&object->memq, qe)) {
1433 vm_page_t prev_page;
1434
1435 prev_page = (vm_page_t) qe;
1436 assert(prev_page->object == object);
1437
1438 if (prev_page->offset == offset) {
1439 vm_page_lookup_hint_prev++;
1440 object->memq_hint = prev_page; /* new hint */
1441 return prev_page;
1442 }
1443 }
1444 }
1445 /*
1446 * Search the hash table for this object/offset pair
1447 */
1448 hash_id = vm_page_hash(object, offset);
1449 bucket = &vm_page_buckets[hash_id];
1450
1451 /*
1452 * since we hold the object lock, we are guaranteed that no
1453 * new pages can be inserted into this object... this in turn
1454 * guarantess that the page we're looking for can't exist
1455 * if the bucket it hashes to is currently NULL even when looked
1456 * at outside the scope of the hash bucket lock... this is a
1457 * really cheap optimiztion to avoid taking the lock
1458 */
1459 if (bucket->pages == VM_PAGE_NULL) {
1460 vm_page_lookup_bucket_NULL++;
1461
1462 return (VM_PAGE_NULL);
1463 }
1464 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1465
1466 lck_spin_lock(bucket_lock);
1467
1468 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem->next) {
1469 #if 0
1470 /*
1471 * we don't hold the page queue lock
1472 * so this check isn't safe to make
1473 */
1474 VM_PAGE_CHECK(mem);
1475 #endif
1476 if ((mem->object == object) && (mem->offset == offset))
1477 break;
1478 }
1479 lck_spin_unlock(bucket_lock);
1480
1481 if (mem != VM_PAGE_NULL) {
1482 if (object->memq_hint != VM_PAGE_NULL) {
1483 vm_page_lookup_hint_miss++;
1484 }
1485 assert(mem->object == object);
1486 object->memq_hint = mem;
1487 } else
1488 vm_page_lookup_miss++;
1489
1490 return(mem);
1491 }
1492
1493
1494 /*
1495 * vm_page_rename:
1496 *
1497 * Move the given memory entry from its
1498 * current object to the specified target object/offset.
1499 *
1500 * The object must be locked.
1501 */
1502 void
1503 vm_page_rename(
1504 register vm_page_t mem,
1505 register vm_object_t new_object,
1506 vm_object_offset_t new_offset,
1507 boolean_t encrypted_ok)
1508 {
1509 boolean_t internal_to_external, external_to_internal;
1510
1511 assert(mem->object != new_object);
1512
1513 /*
1514 * ENCRYPTED SWAP:
1515 * The encryption key is based on the page's memory object
1516 * (aka "pager") and paging offset. Moving the page to
1517 * another VM object changes its "pager" and "paging_offset"
1518 * so it has to be decrypted first, or we would lose the key.
1519 *
1520 * One exception is VM object collapsing, where we transfer pages
1521 * from one backing object to its parent object. This operation also
1522 * transfers the paging information, so the <pager,paging_offset> info
1523 * should remain consistent. The caller (vm_object_do_collapse())
1524 * sets "encrypted_ok" in this case.
1525 */
1526 if (!encrypted_ok && mem->encrypted) {
1527 panic("vm_page_rename: page %p is encrypted\n", mem);
1528 }
1529
1530 XPR(XPR_VM_PAGE,
1531 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1532 new_object, new_offset,
1533 mem, 0,0);
1534
1535 /*
1536 * Changes to mem->object require the page lock because
1537 * the pageout daemon uses that lock to get the object.
1538 */
1539 vm_page_lockspin_queues();
1540
1541 internal_to_external = FALSE;
1542 external_to_internal = FALSE;
1543
1544 if (mem->local) {
1545 /*
1546 * it's much easier to get the vm_page_pageable_xxx accounting correct
1547 * if we first move the page to the active queue... it's going to end
1548 * up there anyway, and we don't do vm_page_rename's frequently enough
1549 * for this to matter.
1550 */
1551 VM_PAGE_QUEUES_REMOVE(mem);
1552 vm_page_activate(mem);
1553 }
1554 if (mem->active || mem->inactive || mem->speculative) {
1555 if (mem->object->internal && !new_object->internal) {
1556 internal_to_external = TRUE;
1557 }
1558 if (!mem->object->internal && new_object->internal) {
1559 external_to_internal = TRUE;
1560 }
1561 }
1562
1563 vm_page_remove(mem, TRUE);
1564 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1565
1566 if (internal_to_external) {
1567 vm_page_pageable_internal_count--;
1568 vm_page_pageable_external_count++;
1569 } else if (external_to_internal) {
1570 vm_page_pageable_external_count--;
1571 vm_page_pageable_internal_count++;
1572 }
1573
1574 vm_page_unlock_queues();
1575 }
1576
1577 /*
1578 * vm_page_init:
1579 *
1580 * Initialize the fields in a new page.
1581 * This takes a structure with random values and initializes it
1582 * so that it can be given to vm_page_release or vm_page_insert.
1583 */
1584 void
1585 vm_page_init(
1586 vm_page_t mem,
1587 ppnum_t phys_page,
1588 boolean_t lopage)
1589 {
1590 assert(phys_page);
1591
1592 #if DEBUG
1593 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1594 if (!(pmap_valid_page(phys_page))) {
1595 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1596 }
1597 }
1598 #endif
1599 *mem = vm_page_template;
1600 mem->phys_page = phys_page;
1601 #if 0
1602 /*
1603 * we're leaving this turned off for now... currently pages
1604 * come off the free list and are either immediately dirtied/referenced
1605 * due to zero-fill or COW faults, or are used to read or write files...
1606 * in the file I/O case, the UPL mechanism takes care of clearing
1607 * the state of the HW ref/mod bits in a somewhat fragile way.
1608 * Since we may change the way this works in the future (to toughen it up),
1609 * I'm leaving this as a reminder of where these bits could get cleared
1610 */
1611
1612 /*
1613 * make sure both the h/w referenced and modified bits are
1614 * clear at this point... we are especially dependent on
1615 * not finding a 'stale' h/w modified in a number of spots
1616 * once this page goes back into use
1617 */
1618 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1619 #endif
1620 mem->lopage = lopage;
1621 }
1622
1623 /*
1624 * vm_page_grab_fictitious:
1625 *
1626 * Remove a fictitious page from the free list.
1627 * Returns VM_PAGE_NULL if there are no free pages.
1628 */
1629 int c_vm_page_grab_fictitious = 0;
1630 int c_vm_page_grab_fictitious_failed = 0;
1631 int c_vm_page_release_fictitious = 0;
1632 int c_vm_page_more_fictitious = 0;
1633
1634 vm_page_t
1635 vm_page_grab_fictitious_common(
1636 ppnum_t phys_addr)
1637 {
1638 vm_page_t m;
1639
1640 if ((m = (vm_page_t)zget(vm_page_zone))) {
1641
1642 vm_page_init(m, phys_addr, FALSE);
1643 m->fictitious = TRUE;
1644
1645 c_vm_page_grab_fictitious++;
1646 } else
1647 c_vm_page_grab_fictitious_failed++;
1648
1649 return m;
1650 }
1651
1652 vm_page_t
1653 vm_page_grab_fictitious(void)
1654 {
1655 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1656 }
1657
1658 vm_page_t
1659 vm_page_grab_guard(void)
1660 {
1661 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1662 }
1663
1664
1665 /*
1666 * vm_page_release_fictitious:
1667 *
1668 * Release a fictitious page to the zone pool
1669 */
1670 void
1671 vm_page_release_fictitious(
1672 vm_page_t m)
1673 {
1674 assert(!m->free);
1675 assert(m->fictitious);
1676 assert(m->phys_page == vm_page_fictitious_addr ||
1677 m->phys_page == vm_page_guard_addr);
1678
1679 c_vm_page_release_fictitious++;
1680
1681 zfree(vm_page_zone, m);
1682 }
1683
1684 /*
1685 * vm_page_more_fictitious:
1686 *
1687 * Add more fictitious pages to the zone.
1688 * Allowed to block. This routine is way intimate
1689 * with the zones code, for several reasons:
1690 * 1. we need to carve some page structures out of physical
1691 * memory before zones work, so they _cannot_ come from
1692 * the zone_map.
1693 * 2. the zone needs to be collectable in order to prevent
1694 * growth without bound. These structures are used by
1695 * the device pager (by the hundreds and thousands), as
1696 * private pages for pageout, and as blocking pages for
1697 * pagein. Temporary bursts in demand should not result in
1698 * permanent allocation of a resource.
1699 * 3. To smooth allocation humps, we allocate single pages
1700 * with kernel_memory_allocate(), and cram them into the
1701 * zone.
1702 */
1703
1704 void vm_page_more_fictitious(void)
1705 {
1706 vm_offset_t addr;
1707 kern_return_t retval;
1708
1709 c_vm_page_more_fictitious++;
1710
1711 /*
1712 * Allocate a single page from the zone_map. Do not wait if no physical
1713 * pages are immediately available, and do not zero the space. We need
1714 * our own blocking lock here to prevent having multiple,
1715 * simultaneous requests from piling up on the zone_map lock. Exactly
1716 * one (of our) threads should be potentially waiting on the map lock.
1717 * If winner is not vm-privileged, then the page allocation will fail,
1718 * and it will temporarily block here in the vm_page_wait().
1719 */
1720 lck_mtx_lock(&vm_page_alloc_lock);
1721 /*
1722 * If another thread allocated space, just bail out now.
1723 */
1724 if (zone_free_count(vm_page_zone) > 5) {
1725 /*
1726 * The number "5" is a small number that is larger than the
1727 * number of fictitious pages that any single caller will
1728 * attempt to allocate. Otherwise, a thread will attempt to
1729 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1730 * release all of the resources and locks already acquired,
1731 * and then call this routine. This routine finds the pages
1732 * that the caller released, so fails to allocate new space.
1733 * The process repeats infinitely. The largest known number
1734 * of fictitious pages required in this manner is 2. 5 is
1735 * simply a somewhat larger number.
1736 */
1737 lck_mtx_unlock(&vm_page_alloc_lock);
1738 return;
1739 }
1740
1741 retval = kernel_memory_allocate(zone_map,
1742 &addr, PAGE_SIZE, VM_PROT_ALL,
1743 KMA_KOBJECT|KMA_NOPAGEWAIT);
1744 if (retval != KERN_SUCCESS) {
1745 /*
1746 * No page was available. Drop the
1747 * lock to give another thread a chance at it, and
1748 * wait for the pageout daemon to make progress.
1749 */
1750 lck_mtx_unlock(&vm_page_alloc_lock);
1751 vm_page_wait(THREAD_UNINT);
1752 return;
1753 }
1754
1755 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1756 OSAddAtomic64(1, &(vm_page_zone->page_count));
1757
1758 zcram(vm_page_zone, addr, PAGE_SIZE);
1759
1760 lck_mtx_unlock(&vm_page_alloc_lock);
1761 }
1762
1763
1764 /*
1765 * vm_pool_low():
1766 *
1767 * Return true if it is not likely that a non-vm_privileged thread
1768 * can get memory without blocking. Advisory only, since the
1769 * situation may change under us.
1770 */
1771 int
1772 vm_pool_low(void)
1773 {
1774 /* No locking, at worst we will fib. */
1775 return( vm_page_free_count <= vm_page_free_reserved );
1776 }
1777
1778
1779
1780 /*
1781 * this is an interface to support bring-up of drivers
1782 * on platforms with physical memory > 4G...
1783 */
1784 int vm_himemory_mode = 0;
1785
1786
1787 /*
1788 * this interface exists to support hardware controllers
1789 * incapable of generating DMAs with more than 32 bits
1790 * of address on platforms with physical memory > 4G...
1791 */
1792 unsigned int vm_lopages_allocated_q = 0;
1793 unsigned int vm_lopages_allocated_cpm_success = 0;
1794 unsigned int vm_lopages_allocated_cpm_failed = 0;
1795 queue_head_t vm_lopage_queue_free;
1796
1797 vm_page_t
1798 vm_page_grablo(void)
1799 {
1800 vm_page_t mem;
1801
1802 if (vm_lopage_needed == FALSE)
1803 return (vm_page_grab());
1804
1805 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1806
1807 if ( !queue_empty(&vm_lopage_queue_free)) {
1808 queue_remove_first(&vm_lopage_queue_free,
1809 mem,
1810 vm_page_t,
1811 pageq);
1812 assert(vm_lopage_free_count);
1813
1814 vm_lopage_free_count--;
1815 vm_lopages_allocated_q++;
1816
1817 if (vm_lopage_free_count < vm_lopage_lowater)
1818 vm_lopage_refill = TRUE;
1819
1820 lck_mtx_unlock(&vm_page_queue_free_lock);
1821 } else {
1822 lck_mtx_unlock(&vm_page_queue_free_lock);
1823
1824 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1825
1826 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1827 vm_lopages_allocated_cpm_failed++;
1828 lck_mtx_unlock(&vm_page_queue_free_lock);
1829
1830 return (VM_PAGE_NULL);
1831 }
1832 mem->busy = TRUE;
1833
1834 vm_page_lockspin_queues();
1835
1836 mem->gobbled = FALSE;
1837 vm_page_gobble_count--;
1838 vm_page_wire_count--;
1839
1840 vm_lopages_allocated_cpm_success++;
1841 vm_page_unlock_queues();
1842 }
1843 assert(mem->busy);
1844 assert(!mem->free);
1845 assert(!mem->pmapped);
1846 assert(!mem->wpmapped);
1847 assert(!pmap_is_noencrypt(mem->phys_page));
1848
1849 mem->pageq.next = NULL;
1850 mem->pageq.prev = NULL;
1851
1852 return (mem);
1853 }
1854
1855
1856 /*
1857 * vm_page_grab:
1858 *
1859 * first try to grab a page from the per-cpu free list...
1860 * this must be done while pre-emption is disabled... if
1861 * a page is available, we're done...
1862 * if no page is available, grab the vm_page_queue_free_lock
1863 * and see if current number of free pages would allow us
1864 * to grab at least 1... if not, return VM_PAGE_NULL as before...
1865 * if there are pages available, disable preemption and
1866 * recheck the state of the per-cpu free list... we could
1867 * have been preempted and moved to a different cpu, or
1868 * some other thread could have re-filled it... if still
1869 * empty, figure out how many pages we can steal from the
1870 * global free queue and move to the per-cpu queue...
1871 * return 1 of these pages when done... only wakeup the
1872 * pageout_scan thread if we moved pages from the global
1873 * list... no need for the wakeup if we've satisfied the
1874 * request from the per-cpu queue.
1875 */
1876
1877 #define COLOR_GROUPS_TO_STEAL 4
1878
1879
1880 vm_page_t
1881 vm_page_grab( void )
1882 {
1883 vm_page_t mem;
1884
1885
1886 disable_preemption();
1887
1888 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1889 return_page_from_cpu_list:
1890 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
1891 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
1892 mem->pageq.next = NULL;
1893
1894 enable_preemption();
1895
1896 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
1897 assert(mem->tabled == FALSE);
1898 assert(mem->object == VM_OBJECT_NULL);
1899 assert(!mem->laundry);
1900 assert(!mem->free);
1901 assert(pmap_verify_free(mem->phys_page));
1902 assert(mem->busy);
1903 assert(!mem->encrypted);
1904 assert(!mem->pmapped);
1905 assert(!mem->wpmapped);
1906 assert(!mem->active);
1907 assert(!mem->inactive);
1908 assert(!mem->throttled);
1909 assert(!mem->speculative);
1910 assert(!pmap_is_noencrypt(mem->phys_page));
1911
1912 return mem;
1913 }
1914 enable_preemption();
1915
1916
1917 /*
1918 * Optionally produce warnings if the wire or gobble
1919 * counts exceed some threshold.
1920 */
1921 if (vm_page_wire_count_warning > 0
1922 && vm_page_wire_count >= vm_page_wire_count_warning) {
1923 printf("mk: vm_page_grab(): high wired page count of %d\n",
1924 vm_page_wire_count);
1925 assert(vm_page_wire_count < vm_page_wire_count_warning);
1926 }
1927 if (vm_page_gobble_count_warning > 0
1928 && vm_page_gobble_count >= vm_page_gobble_count_warning) {
1929 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
1930 vm_page_gobble_count);
1931 assert(vm_page_gobble_count < vm_page_gobble_count_warning);
1932 }
1933
1934 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1935
1936 /*
1937 * Only let privileged threads (involved in pageout)
1938 * dip into the reserved pool.
1939 */
1940 if ((vm_page_free_count < vm_page_free_reserved) &&
1941 !(current_thread()->options & TH_OPT_VMPRIV)) {
1942 lck_mtx_unlock(&vm_page_queue_free_lock);
1943 mem = VM_PAGE_NULL;
1944 }
1945 else {
1946 vm_page_t head;
1947 vm_page_t tail;
1948 unsigned int pages_to_steal;
1949 unsigned int color;
1950
1951 while ( vm_page_free_count == 0 ) {
1952
1953 lck_mtx_unlock(&vm_page_queue_free_lock);
1954 /*
1955 * must be a privileged thread to be
1956 * in this state since a non-privileged
1957 * thread would have bailed if we were
1958 * under the vm_page_free_reserved mark
1959 */
1960 VM_PAGE_WAIT();
1961 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1962 }
1963
1964 disable_preemption();
1965
1966 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
1967 lck_mtx_unlock(&vm_page_queue_free_lock);
1968
1969 /*
1970 * we got preempted and moved to another processor
1971 * or we got preempted and someone else ran and filled the cache
1972 */
1973 goto return_page_from_cpu_list;
1974 }
1975 if (vm_page_free_count <= vm_page_free_reserved)
1976 pages_to_steal = 1;
1977 else {
1978 pages_to_steal = COLOR_GROUPS_TO_STEAL * vm_colors;
1979
1980 if (pages_to_steal > (vm_page_free_count - vm_page_free_reserved))
1981 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
1982 }
1983 color = PROCESSOR_DATA(current_processor(), start_color);
1984 head = tail = NULL;
1985
1986 while (pages_to_steal--) {
1987 if (--vm_page_free_count < vm_page_free_count_minimum)
1988 vm_page_free_count_minimum = vm_page_free_count;
1989
1990 while (queue_empty(&vm_page_queue_free[color]))
1991 color = (color + 1) & vm_color_mask;
1992
1993 queue_remove_first(&vm_page_queue_free[color],
1994 mem,
1995 vm_page_t,
1996 pageq);
1997 mem->pageq.next = NULL;
1998 mem->pageq.prev = NULL;
1999
2000 assert(!mem->active);
2001 assert(!mem->inactive);
2002 assert(!mem->throttled);
2003 assert(!mem->speculative);
2004
2005 color = (color + 1) & vm_color_mask;
2006
2007 if (head == NULL)
2008 head = mem;
2009 else
2010 tail->pageq.next = (queue_t)mem;
2011 tail = mem;
2012
2013 mem->pageq.prev = NULL;
2014 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2015 assert(mem->tabled == FALSE);
2016 assert(mem->object == VM_OBJECT_NULL);
2017 assert(!mem->laundry);
2018 assert(mem->free);
2019 mem->free = FALSE;
2020
2021 assert(pmap_verify_free(mem->phys_page));
2022 assert(mem->busy);
2023 assert(!mem->free);
2024 assert(!mem->encrypted);
2025 assert(!mem->pmapped);
2026 assert(!mem->wpmapped);
2027 assert(!pmap_is_noencrypt(mem->phys_page));
2028 }
2029 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2030 PROCESSOR_DATA(current_processor(), start_color) = color;
2031
2032 /*
2033 * satisfy this request
2034 */
2035 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2036 mem = head;
2037 mem->pageq.next = NULL;
2038
2039 lck_mtx_unlock(&vm_page_queue_free_lock);
2040
2041 enable_preemption();
2042 }
2043 /*
2044 * Decide if we should poke the pageout daemon.
2045 * We do this if the free count is less than the low
2046 * water mark, or if the free count is less than the high
2047 * water mark (but above the low water mark) and the inactive
2048 * count is less than its target.
2049 *
2050 * We don't have the counts locked ... if they change a little,
2051 * it doesn't really matter.
2052 */
2053 if ((vm_page_free_count < vm_page_free_min) ||
2054 ((vm_page_free_count < vm_page_free_target) &&
2055 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2056 thread_wakeup((event_t) &vm_page_free_wanted);
2057
2058 VM_CHECK_MEMORYSTATUS;
2059
2060 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2061
2062 return mem;
2063 }
2064
2065 /*
2066 * vm_page_release:
2067 *
2068 * Return a page to the free list.
2069 */
2070
2071 void
2072 vm_page_release(
2073 register vm_page_t mem)
2074 {
2075 unsigned int color;
2076 int need_wakeup = 0;
2077 int need_priv_wakeup = 0;
2078
2079
2080 assert(!mem->private && !mem->fictitious);
2081 if (vm_page_free_verify) {
2082 assert(pmap_verify_free(mem->phys_page));
2083 }
2084 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2085
2086 pmap_clear_noencrypt(mem->phys_page);
2087
2088 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2089 #if DEBUG
2090 if (mem->free)
2091 panic("vm_page_release");
2092 #endif
2093
2094 assert(mem->busy);
2095 assert(!mem->laundry);
2096 assert(mem->object == VM_OBJECT_NULL);
2097 assert(mem->pageq.next == NULL &&
2098 mem->pageq.prev == NULL);
2099 assert(mem->listq.next == NULL &&
2100 mem->listq.prev == NULL);
2101
2102 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2103 vm_lopage_free_count < vm_lopage_free_limit &&
2104 mem->phys_page < max_valid_low_ppnum) {
2105 /*
2106 * this exists to support hardware controllers
2107 * incapable of generating DMAs with more than 32 bits
2108 * of address on platforms with physical memory > 4G...
2109 */
2110 queue_enter_first(&vm_lopage_queue_free,
2111 mem,
2112 vm_page_t,
2113 pageq);
2114 vm_lopage_free_count++;
2115
2116 if (vm_lopage_free_count >= vm_lopage_free_limit)
2117 vm_lopage_refill = FALSE;
2118
2119 mem->lopage = TRUE;
2120 } else {
2121 mem->lopage = FALSE;
2122 mem->free = TRUE;
2123
2124 color = mem->phys_page & vm_color_mask;
2125 queue_enter_first(&vm_page_queue_free[color],
2126 mem,
2127 vm_page_t,
2128 pageq);
2129 vm_page_free_count++;
2130 /*
2131 * Check if we should wake up someone waiting for page.
2132 * But don't bother waking them unless they can allocate.
2133 *
2134 * We wakeup only one thread, to prevent starvation.
2135 * Because the scheduling system handles wait queues FIFO,
2136 * if we wakeup all waiting threads, one greedy thread
2137 * can starve multiple niceguy threads. When the threads
2138 * all wakeup, the greedy threads runs first, grabs the page,
2139 * and waits for another page. It will be the first to run
2140 * when the next page is freed.
2141 *
2142 * However, there is a slight danger here.
2143 * The thread we wake might not use the free page.
2144 * Then the other threads could wait indefinitely
2145 * while the page goes unused. To forestall this,
2146 * the pageout daemon will keep making free pages
2147 * as long as vm_page_free_wanted is non-zero.
2148 */
2149
2150 assert(vm_page_free_count > 0);
2151 if (vm_page_free_wanted_privileged > 0) {
2152 vm_page_free_wanted_privileged--;
2153 need_priv_wakeup = 1;
2154 } else if (vm_page_free_wanted > 0 &&
2155 vm_page_free_count > vm_page_free_reserved) {
2156 vm_page_free_wanted--;
2157 need_wakeup = 1;
2158 }
2159 }
2160 lck_mtx_unlock(&vm_page_queue_free_lock);
2161
2162 if (need_priv_wakeup)
2163 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2164 else if (need_wakeup)
2165 thread_wakeup_one((event_t) &vm_page_free_count);
2166
2167 VM_CHECK_MEMORYSTATUS;
2168 }
2169
2170 /*
2171 * vm_page_wait:
2172 *
2173 * Wait for a page to become available.
2174 * If there are plenty of free pages, then we don't sleep.
2175 *
2176 * Returns:
2177 * TRUE: There may be another page, try again
2178 * FALSE: We were interrupted out of our wait, don't try again
2179 */
2180
2181 boolean_t
2182 vm_page_wait(
2183 int interruptible )
2184 {
2185 /*
2186 * We can't use vm_page_free_reserved to make this
2187 * determination. Consider: some thread might
2188 * need to allocate two pages. The first allocation
2189 * succeeds, the second fails. After the first page is freed,
2190 * a call to vm_page_wait must really block.
2191 */
2192 kern_return_t wait_result;
2193 int need_wakeup = 0;
2194 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2195
2196 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2197
2198 if (is_privileged && vm_page_free_count) {
2199 lck_mtx_unlock(&vm_page_queue_free_lock);
2200 return TRUE;
2201 }
2202 if (vm_page_free_count < vm_page_free_target) {
2203
2204 if (is_privileged) {
2205 if (vm_page_free_wanted_privileged++ == 0)
2206 need_wakeup = 1;
2207 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2208 } else {
2209 if (vm_page_free_wanted++ == 0)
2210 need_wakeup = 1;
2211 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2212 }
2213 lck_mtx_unlock(&vm_page_queue_free_lock);
2214 counter(c_vm_page_wait_block++);
2215
2216 if (need_wakeup)
2217 thread_wakeup((event_t)&vm_page_free_wanted);
2218
2219 if (wait_result == THREAD_WAITING) {
2220 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2221 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2222 wait_result = thread_block(THREAD_CONTINUE_NULL);
2223 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2224 }
2225
2226 return(wait_result == THREAD_AWAKENED);
2227 } else {
2228 lck_mtx_unlock(&vm_page_queue_free_lock);
2229 return TRUE;
2230 }
2231 }
2232
2233 /*
2234 * vm_page_alloc:
2235 *
2236 * Allocate and return a memory cell associated
2237 * with this VM object/offset pair.
2238 *
2239 * Object must be locked.
2240 */
2241
2242 vm_page_t
2243 vm_page_alloc(
2244 vm_object_t object,
2245 vm_object_offset_t offset)
2246 {
2247 register vm_page_t mem;
2248
2249 vm_object_lock_assert_exclusive(object);
2250 mem = vm_page_grab();
2251 if (mem == VM_PAGE_NULL)
2252 return VM_PAGE_NULL;
2253
2254 vm_page_insert(mem, object, offset);
2255
2256 return(mem);
2257 }
2258
2259 vm_page_t
2260 vm_page_alloclo(
2261 vm_object_t object,
2262 vm_object_offset_t offset)
2263 {
2264 register vm_page_t mem;
2265
2266 vm_object_lock_assert_exclusive(object);
2267 mem = vm_page_grablo();
2268 if (mem == VM_PAGE_NULL)
2269 return VM_PAGE_NULL;
2270
2271 vm_page_insert(mem, object, offset);
2272
2273 return(mem);
2274 }
2275
2276
2277 /*
2278 * vm_page_alloc_guard:
2279 *
2280 * Allocate a fictitious page which will be used
2281 * as a guard page. The page will be inserted into
2282 * the object and returned to the caller.
2283 */
2284
2285 vm_page_t
2286 vm_page_alloc_guard(
2287 vm_object_t object,
2288 vm_object_offset_t offset)
2289 {
2290 register vm_page_t mem;
2291
2292 vm_object_lock_assert_exclusive(object);
2293 mem = vm_page_grab_guard();
2294 if (mem == VM_PAGE_NULL)
2295 return VM_PAGE_NULL;
2296
2297 vm_page_insert(mem, object, offset);
2298
2299 return(mem);
2300 }
2301
2302
2303 counter(unsigned int c_laundry_pages_freed = 0;)
2304
2305 /*
2306 * vm_page_free_prepare:
2307 *
2308 * Removes page from any queue it may be on
2309 * and disassociates it from its VM object.
2310 *
2311 * Object and page queues must be locked prior to entry.
2312 */
2313 static void
2314 vm_page_free_prepare(
2315 vm_page_t mem)
2316 {
2317 vm_page_free_prepare_queues(mem);
2318 vm_page_free_prepare_object(mem, TRUE);
2319 }
2320
2321
2322 void
2323 vm_page_free_prepare_queues(
2324 vm_page_t mem)
2325 {
2326 VM_PAGE_CHECK(mem);
2327 assert(!mem->free);
2328 assert(!mem->cleaning);
2329 #if DEBUG
2330 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2331 if (mem->free)
2332 panic("vm_page_free: freeing page on free list\n");
2333 #endif
2334 if (mem->object) {
2335 vm_object_lock_assert_exclusive(mem->object);
2336 }
2337 if (mem->laundry) {
2338 /*
2339 * We may have to free a page while it's being laundered
2340 * if we lost its pager (due to a forced unmount, for example).
2341 * We need to call vm_pageout_steal_laundry() before removing
2342 * the page from its VM object, so that we can remove it
2343 * from its pageout queue and adjust the laundry accounting
2344 */
2345 vm_pageout_steal_laundry(mem, TRUE);
2346 counter(++c_laundry_pages_freed);
2347 }
2348
2349 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2350
2351 if (VM_PAGE_WIRED(mem)) {
2352 if (mem->object) {
2353 assert(mem->object->wired_page_count > 0);
2354 mem->object->wired_page_count--;
2355 assert(mem->object->resident_page_count >=
2356 mem->object->wired_page_count);
2357
2358 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2359 OSAddAtomic(+1, &vm_page_purgeable_count);
2360 assert(vm_page_purgeable_wired_count > 0);
2361 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2362 }
2363 }
2364 if (!mem->private && !mem->fictitious)
2365 vm_page_wire_count--;
2366 mem->wire_count = 0;
2367 assert(!mem->gobbled);
2368 } else if (mem->gobbled) {
2369 if (!mem->private && !mem->fictitious)
2370 vm_page_wire_count--;
2371 vm_page_gobble_count--;
2372 }
2373 }
2374
2375
2376 void
2377 vm_page_free_prepare_object(
2378 vm_page_t mem,
2379 boolean_t remove_from_hash)
2380 {
2381 if (mem->tabled)
2382 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2383
2384 PAGE_WAKEUP(mem); /* clears wanted */
2385
2386 if (mem->private) {
2387 mem->private = FALSE;
2388 mem->fictitious = TRUE;
2389 mem->phys_page = vm_page_fictitious_addr;
2390 }
2391 if ( !mem->fictitious) {
2392 vm_page_init(mem, mem->phys_page, mem->lopage);
2393 }
2394 }
2395
2396
2397 /*
2398 * vm_page_free:
2399 *
2400 * Returns the given page to the free list,
2401 * disassociating it with any VM object.
2402 *
2403 * Object and page queues must be locked prior to entry.
2404 */
2405 void
2406 vm_page_free(
2407 vm_page_t mem)
2408 {
2409 vm_page_free_prepare(mem);
2410
2411 if (mem->fictitious) {
2412 vm_page_release_fictitious(mem);
2413 } else {
2414 vm_page_release(mem);
2415 }
2416 }
2417
2418
2419 void
2420 vm_page_free_unlocked(
2421 vm_page_t mem,
2422 boolean_t remove_from_hash)
2423 {
2424 vm_page_lockspin_queues();
2425 vm_page_free_prepare_queues(mem);
2426 vm_page_unlock_queues();
2427
2428 vm_page_free_prepare_object(mem, remove_from_hash);
2429
2430 if (mem->fictitious) {
2431 vm_page_release_fictitious(mem);
2432 } else {
2433 vm_page_release(mem);
2434 }
2435 }
2436
2437
2438 /*
2439 * Free a list of pages. The list can be up to several hundred pages,
2440 * as blocked up by vm_pageout_scan().
2441 * The big win is not having to take the free list lock once
2442 * per page.
2443 */
2444 void
2445 vm_page_free_list(
2446 vm_page_t freeq,
2447 boolean_t prepare_object)
2448 {
2449 vm_page_t mem;
2450 vm_page_t nxt;
2451 vm_page_t local_freeq;
2452 int pg_count;
2453
2454 while (freeq) {
2455
2456 pg_count = 0;
2457 local_freeq = VM_PAGE_NULL;
2458 mem = freeq;
2459
2460 /*
2461 * break up the processing into smaller chunks so
2462 * that we can 'pipeline' the pages onto the
2463 * free list w/o introducing too much
2464 * contention on the global free queue lock
2465 */
2466 while (mem && pg_count < 64) {
2467
2468 assert(!mem->inactive);
2469 assert(!mem->active);
2470 assert(!mem->throttled);
2471 assert(!mem->free);
2472 assert(!mem->speculative);
2473 assert(!VM_PAGE_WIRED(mem));
2474 assert(mem->pageq.prev == NULL);
2475
2476 nxt = (vm_page_t)(mem->pageq.next);
2477
2478 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2479 assert(pmap_verify_free(mem->phys_page));
2480 }
2481 if (prepare_object == TRUE)
2482 vm_page_free_prepare_object(mem, TRUE);
2483
2484 if (!mem->fictitious) {
2485 assert(mem->busy);
2486
2487 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2488 vm_lopage_free_count < vm_lopage_free_limit &&
2489 mem->phys_page < max_valid_low_ppnum) {
2490 mem->pageq.next = NULL;
2491 vm_page_release(mem);
2492 } else {
2493 /*
2494 * IMPORTANT: we can't set the page "free" here
2495 * because that would make the page eligible for
2496 * a physically-contiguous allocation (see
2497 * vm_page_find_contiguous()) right away (we don't
2498 * hold the vm_page_queue_free lock). That would
2499 * cause trouble because the page is not actually
2500 * in the free queue yet...
2501 */
2502 mem->pageq.next = (queue_entry_t)local_freeq;
2503 local_freeq = mem;
2504 pg_count++;
2505
2506 pmap_clear_noencrypt(mem->phys_page);
2507 }
2508 } else {
2509 assert(mem->phys_page == vm_page_fictitious_addr ||
2510 mem->phys_page == vm_page_guard_addr);
2511 vm_page_release_fictitious(mem);
2512 }
2513 mem = nxt;
2514 }
2515 freeq = mem;
2516
2517 if ( (mem = local_freeq) ) {
2518 unsigned int avail_free_count;
2519 unsigned int need_wakeup = 0;
2520 unsigned int need_priv_wakeup = 0;
2521
2522 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2523
2524 while (mem) {
2525 int color;
2526
2527 nxt = (vm_page_t)(mem->pageq.next);
2528
2529 assert(!mem->free);
2530 assert(mem->busy);
2531 mem->free = TRUE;
2532
2533 color = mem->phys_page & vm_color_mask;
2534 queue_enter_first(&vm_page_queue_free[color],
2535 mem,
2536 vm_page_t,
2537 pageq);
2538 mem = nxt;
2539 }
2540 vm_page_free_count += pg_count;
2541 avail_free_count = vm_page_free_count;
2542
2543 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2544
2545 if (avail_free_count < vm_page_free_wanted_privileged) {
2546 need_priv_wakeup = avail_free_count;
2547 vm_page_free_wanted_privileged -= avail_free_count;
2548 avail_free_count = 0;
2549 } else {
2550 need_priv_wakeup = vm_page_free_wanted_privileged;
2551 vm_page_free_wanted_privileged = 0;
2552 avail_free_count -= vm_page_free_wanted_privileged;
2553 }
2554 }
2555 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2556 unsigned int available_pages;
2557
2558 available_pages = avail_free_count - vm_page_free_reserved;
2559
2560 if (available_pages >= vm_page_free_wanted) {
2561 need_wakeup = vm_page_free_wanted;
2562 vm_page_free_wanted = 0;
2563 } else {
2564 need_wakeup = available_pages;
2565 vm_page_free_wanted -= available_pages;
2566 }
2567 }
2568 lck_mtx_unlock(&vm_page_queue_free_lock);
2569
2570 if (need_priv_wakeup != 0) {
2571 /*
2572 * There shouldn't be that many VM-privileged threads,
2573 * so let's wake them all up, even if we don't quite
2574 * have enough pages to satisfy them all.
2575 */
2576 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2577 }
2578 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2579 /*
2580 * We don't expect to have any more waiters
2581 * after this, so let's wake them all up at
2582 * once.
2583 */
2584 thread_wakeup((event_t) &vm_page_free_count);
2585 } else for (; need_wakeup != 0; need_wakeup--) {
2586 /*
2587 * Wake up one waiter per page we just released.
2588 */
2589 thread_wakeup_one((event_t) &vm_page_free_count);
2590 }
2591
2592 VM_CHECK_MEMORYSTATUS;
2593 }
2594 }
2595 }
2596
2597
2598 /*
2599 * vm_page_wire:
2600 *
2601 * Mark this page as wired down by yet
2602 * another map, removing it from paging queues
2603 * as necessary.
2604 *
2605 * The page's object and the page queues must be locked.
2606 */
2607 void
2608 vm_page_wire(
2609 register vm_page_t mem)
2610 {
2611
2612 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2613
2614 VM_PAGE_CHECK(mem);
2615 if (mem->object) {
2616 vm_object_lock_assert_exclusive(mem->object);
2617 } else {
2618 /*
2619 * In theory, the page should be in an object before it
2620 * gets wired, since we need to hold the object lock
2621 * to update some fields in the page structure.
2622 * However, some code (i386 pmap, for example) might want
2623 * to wire a page before it gets inserted into an object.
2624 * That's somewhat OK, as long as nobody else can get to
2625 * that page and update it at the same time.
2626 */
2627 }
2628 #if DEBUG
2629 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2630 #endif
2631 if ( !VM_PAGE_WIRED(mem)) {
2632
2633 if (mem->pageout_queue) {
2634 mem->pageout = FALSE;
2635 vm_pageout_throttle_up(mem);
2636 }
2637 VM_PAGE_QUEUES_REMOVE(mem);
2638
2639 if (mem->object) {
2640 mem->object->wired_page_count++;
2641 assert(mem->object->resident_page_count >=
2642 mem->object->wired_page_count);
2643 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2644 assert(vm_page_purgeable_count > 0);
2645 OSAddAtomic(-1, &vm_page_purgeable_count);
2646 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2647 }
2648 if (mem->object->all_reusable) {
2649 /*
2650 * Wired pages are not counted as "re-usable"
2651 * in "all_reusable" VM objects, so nothing
2652 * to do here.
2653 */
2654 } else if (mem->reusable) {
2655 /*
2656 * This page is not "re-usable" when it's
2657 * wired, so adjust its state and the
2658 * accounting.
2659 */
2660 vm_object_reuse_pages(mem->object,
2661 mem->offset,
2662 mem->offset+PAGE_SIZE_64,
2663 FALSE);
2664 }
2665 }
2666 assert(!mem->reusable);
2667
2668 if (!mem->private && !mem->fictitious && !mem->gobbled)
2669 vm_page_wire_count++;
2670 if (mem->gobbled)
2671 vm_page_gobble_count--;
2672 mem->gobbled = FALSE;
2673
2674 VM_CHECK_MEMORYSTATUS;
2675
2676 /*
2677 * ENCRYPTED SWAP:
2678 * The page could be encrypted, but
2679 * We don't have to decrypt it here
2680 * because we don't guarantee that the
2681 * data is actually valid at this point.
2682 * The page will get decrypted in
2683 * vm_fault_wire() if needed.
2684 */
2685 }
2686 assert(!mem->gobbled);
2687 mem->wire_count++;
2688 VM_PAGE_CHECK(mem);
2689 }
2690
2691 /*
2692 * vm_page_gobble:
2693 *
2694 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2695 *
2696 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2697 */
2698 void
2699 vm_page_gobble(
2700 register vm_page_t mem)
2701 {
2702 vm_page_lockspin_queues();
2703 VM_PAGE_CHECK(mem);
2704
2705 assert(!mem->gobbled);
2706 assert( !VM_PAGE_WIRED(mem));
2707
2708 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2709 if (!mem->private && !mem->fictitious)
2710 vm_page_wire_count++;
2711 }
2712 vm_page_gobble_count++;
2713 mem->gobbled = TRUE;
2714 vm_page_unlock_queues();
2715 }
2716
2717 /*
2718 * vm_page_unwire:
2719 *
2720 * Release one wiring of this page, potentially
2721 * enabling it to be paged again.
2722 *
2723 * The page's object and the page queues must be locked.
2724 */
2725 void
2726 vm_page_unwire(
2727 vm_page_t mem,
2728 boolean_t queueit)
2729 {
2730
2731 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2732
2733 VM_PAGE_CHECK(mem);
2734 assert(VM_PAGE_WIRED(mem));
2735 assert(mem->object != VM_OBJECT_NULL);
2736 #if DEBUG
2737 vm_object_lock_assert_exclusive(mem->object);
2738 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2739 #endif
2740 if (--mem->wire_count == 0) {
2741 assert(!mem->private && !mem->fictitious);
2742 vm_page_wire_count--;
2743 assert(mem->object->wired_page_count > 0);
2744 mem->object->wired_page_count--;
2745 assert(mem->object->resident_page_count >=
2746 mem->object->wired_page_count);
2747 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2748 OSAddAtomic(+1, &vm_page_purgeable_count);
2749 assert(vm_page_purgeable_wired_count > 0);
2750 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2751 }
2752 assert(!mem->laundry);
2753 assert(mem->object != kernel_object);
2754 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
2755
2756 if (queueit == TRUE) {
2757 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
2758 vm_page_deactivate(mem);
2759 } else {
2760 vm_page_activate(mem);
2761 }
2762 }
2763
2764 VM_CHECK_MEMORYSTATUS;
2765
2766 }
2767 VM_PAGE_CHECK(mem);
2768 }
2769
2770 /*
2771 * vm_page_deactivate:
2772 *
2773 * Returns the given page to the inactive list,
2774 * indicating that no physical maps have access
2775 * to this page. [Used by the physical mapping system.]
2776 *
2777 * The page queues must be locked.
2778 */
2779 void
2780 vm_page_deactivate(
2781 vm_page_t m)
2782 {
2783 vm_page_deactivate_internal(m, TRUE);
2784 }
2785
2786
2787 void
2788 vm_page_deactivate_internal(
2789 vm_page_t m,
2790 boolean_t clear_hw_reference)
2791 {
2792
2793 VM_PAGE_CHECK(m);
2794 assert(m->object != kernel_object);
2795 assert(m->phys_page != vm_page_guard_addr);
2796
2797 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
2798 #if DEBUG
2799 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2800 #endif
2801 /*
2802 * This page is no longer very interesting. If it was
2803 * interesting (active or inactive/referenced), then we
2804 * clear the reference bit and (re)enter it in the
2805 * inactive queue. Note wired pages should not have
2806 * their reference bit cleared.
2807 */
2808 assert ( !(m->absent && !m->unusual));
2809
2810 if (m->gobbled) { /* can this happen? */
2811 assert( !VM_PAGE_WIRED(m));
2812
2813 if (!m->private && !m->fictitious)
2814 vm_page_wire_count--;
2815 vm_page_gobble_count--;
2816 m->gobbled = FALSE;
2817 }
2818 /*
2819 * if this page is currently on the pageout queue, we can't do the
2820 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2821 * and we can't remove it manually since we would need the object lock
2822 * (which is not required here) to decrement the activity_in_progress
2823 * reference which is held on the object while the page is in the pageout queue...
2824 * just let the normal laundry processing proceed
2825 */
2826 if (m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
2827 return;
2828
2829 if (!m->absent && clear_hw_reference == TRUE)
2830 pmap_clear_reference(m->phys_page);
2831
2832 m->reference = FALSE;
2833 m->no_cache = FALSE;
2834
2835 if (!m->inactive) {
2836 VM_PAGE_QUEUES_REMOVE(m);
2837
2838 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2839 m->dirty && m->object->internal &&
2840 (m->object->purgable == VM_PURGABLE_DENY ||
2841 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2842 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2843 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2844 m->throttled = TRUE;
2845 vm_page_throttled_count++;
2846 } else {
2847 if (m->object->named && m->object->ref_count == 1) {
2848 vm_page_speculate(m, FALSE);
2849 #if DEVELOPMENT || DEBUG
2850 vm_page_speculative_recreated++;
2851 #endif
2852 } else {
2853 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
2854 }
2855 }
2856 }
2857 }
2858
2859 /*
2860 * vm_page_enqueue_cleaned
2861 *
2862 * Put the page on the cleaned queue, mark it cleaned, etc.
2863 * Being on the cleaned queue (and having m->clean_queue set)
2864 * does ** NOT ** guarantee that the page is clean!
2865 *
2866 * Call with the queues lock held.
2867 */
2868
2869 void vm_page_enqueue_cleaned(vm_page_t m)
2870 {
2871 assert(m->phys_page != vm_page_guard_addr);
2872 #if DEBUG
2873 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2874 #endif
2875 assert( !(m->absent && !m->unusual));
2876
2877 if (m->gobbled) {
2878 assert( !VM_PAGE_WIRED(m));
2879 if (!m->private && !m->fictitious)
2880 vm_page_wire_count--;
2881 vm_page_gobble_count--;
2882 m->gobbled = FALSE;
2883 }
2884 /*
2885 * if this page is currently on the pageout queue, we can't do the
2886 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2887 * and we can't remove it manually since we would need the object lock
2888 * (which is not required here) to decrement the activity_in_progress
2889 * reference which is held on the object while the page is in the pageout queue...
2890 * just let the normal laundry processing proceed
2891 */
2892 if (m->clean_queue || m->pageout_queue || m->private || m->fictitious)
2893 return;
2894
2895 VM_PAGE_QUEUES_REMOVE(m);
2896
2897 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
2898 m->clean_queue = TRUE;
2899 vm_page_cleaned_count++;
2900
2901 m->inactive = TRUE;
2902 vm_page_inactive_count++;
2903 if (m->object->internal) {
2904 vm_page_pageable_internal_count++;
2905 } else {
2906 vm_page_pageable_external_count++;
2907 }
2908
2909 vm_pageout_enqueued_cleaned++;
2910 }
2911
2912 /*
2913 * vm_page_activate:
2914 *
2915 * Put the specified page on the active list (if appropriate).
2916 *
2917 * The page queues must be locked.
2918 */
2919
2920 #if CONFIG_JETSAM
2921 #if LATENCY_JETSAM
2922 extern struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS];
2923 #endif /* LATENCY_JETSAM */
2924 #endif /* CONFIG_JETSAM */
2925
2926 void
2927 vm_page_activate(
2928 register vm_page_t m)
2929 {
2930 VM_PAGE_CHECK(m);
2931 #ifdef FIXME_4778297
2932 assert(m->object != kernel_object);
2933 #endif
2934 assert(m->phys_page != vm_page_guard_addr);
2935 #if DEBUG
2936 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2937 #endif
2938 assert( !(m->absent && !m->unusual));
2939
2940 if (m->gobbled) {
2941 assert( !VM_PAGE_WIRED(m));
2942 if (!m->private && !m->fictitious)
2943 vm_page_wire_count--;
2944 vm_page_gobble_count--;
2945 m->gobbled = FALSE;
2946 }
2947 /*
2948 * if this page is currently on the pageout queue, we can't do the
2949 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
2950 * and we can't remove it manually since we would need the object lock
2951 * (which is not required here) to decrement the activity_in_progress
2952 * reference which is held on the object while the page is in the pageout queue...
2953 * just let the normal laundry processing proceed
2954 */
2955 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
2956 return;
2957
2958 #if DEBUG
2959 if (m->active)
2960 panic("vm_page_activate: already active");
2961 #endif
2962
2963 if (m->speculative) {
2964 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
2965 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
2966 }
2967
2968 VM_PAGE_QUEUES_REMOVE(m);
2969
2970 if ( !VM_PAGE_WIRED(m)) {
2971
2972 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
2973 m->dirty && m->object->internal &&
2974 (m->object->purgable == VM_PURGABLE_DENY ||
2975 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
2976 m->object->purgable == VM_PURGABLE_VOLATILE)) {
2977 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
2978 m->throttled = TRUE;
2979 vm_page_throttled_count++;
2980 } else {
2981 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
2982 m->active = TRUE;
2983 vm_page_active_count++;
2984 if (m->object->internal) {
2985 vm_page_pageable_internal_count++;
2986 } else {
2987 vm_page_pageable_external_count++;
2988 }
2989 #if LATENCY_JETSAM
2990 if (jlp_init) {
2991 uint64_t now = mach_absolute_time();
2992 uint64_t delta = now - jlp_time;
2993 clock_sec_t jl_secs = 0;
2994 clock_usec_t jl_usecs = 0;
2995 vm_page_t jlp;
2996
2997 absolutetime_to_microtime(delta, &jl_secs, &jl_usecs);
2998
2999 jl_usecs += jl_secs * USEC_PER_SEC;
3000 if (jl_usecs >= JETSAM_LATENCY_TOKEN_AGE) {
3001
3002 jlp = &jetsam_latency_page[jlp_current];
3003 if (jlp->active) {
3004 queue_remove(&vm_page_queue_active, jlp, vm_page_t, pageq);
3005 }
3006 queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq);
3007
3008 jlp->active = TRUE;
3009
3010 jlp->offset = now;
3011 jlp_time = jlp->offset;
3012
3013 if(++jlp_current == NUM_OF_JETSAM_LATENCY_TOKENS) {
3014 jlp_current = 0;
3015 }
3016
3017 }
3018 }
3019 #endif /* LATENCY_JETSAM */
3020 }
3021 m->reference = TRUE;
3022 m->no_cache = FALSE;
3023 }
3024 VM_PAGE_CHECK(m);
3025 }
3026
3027
3028 /*
3029 * vm_page_speculate:
3030 *
3031 * Put the specified page on the speculative list (if appropriate).
3032 *
3033 * The page queues must be locked.
3034 */
3035 void
3036 vm_page_speculate(
3037 vm_page_t m,
3038 boolean_t new)
3039 {
3040 struct vm_speculative_age_q *aq;
3041
3042 VM_PAGE_CHECK(m);
3043 assert(m->object != kernel_object);
3044 assert(m->phys_page != vm_page_guard_addr);
3045 #if DEBUG
3046 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3047 #endif
3048 assert( !(m->absent && !m->unusual));
3049
3050 /*
3051 * if this page is currently on the pageout queue, we can't do the
3052 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3053 * and we can't remove it manually since we would need the object lock
3054 * (which is not required here) to decrement the activity_in_progress
3055 * reference which is held on the object while the page is in the pageout queue...
3056 * just let the normal laundry processing proceed
3057 */
3058 if (m->pageout_queue || m->private || m->fictitious || m->compressor)
3059 return;
3060
3061 VM_PAGE_QUEUES_REMOVE(m);
3062
3063 if ( !VM_PAGE_WIRED(m)) {
3064 mach_timespec_t ts;
3065 clock_sec_t sec;
3066 clock_nsec_t nsec;
3067
3068 clock_get_system_nanotime(&sec, &nsec);
3069 ts.tv_sec = (unsigned int) sec;
3070 ts.tv_nsec = nsec;
3071
3072 if (vm_page_speculative_count == 0) {
3073
3074 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3075 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3076
3077 aq = &vm_page_queue_speculative[speculative_age_index];
3078
3079 /*
3080 * set the timer to begin a new group
3081 */
3082 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3083 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3084
3085 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3086 } else {
3087 aq = &vm_page_queue_speculative[speculative_age_index];
3088
3089 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3090
3091 speculative_age_index++;
3092
3093 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3094 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3095 if (speculative_age_index == speculative_steal_index) {
3096 speculative_steal_index = speculative_age_index + 1;
3097
3098 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3099 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3100 }
3101 aq = &vm_page_queue_speculative[speculative_age_index];
3102
3103 if (!queue_empty(&aq->age_q))
3104 vm_page_speculate_ageit(aq);
3105
3106 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3107 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3108
3109 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3110 }
3111 }
3112 enqueue_tail(&aq->age_q, &m->pageq);
3113 m->speculative = TRUE;
3114 vm_page_speculative_count++;
3115 if (m->object->internal) {
3116 vm_page_pageable_internal_count++;
3117 } else {
3118 vm_page_pageable_external_count++;
3119 }
3120
3121 if (new == TRUE) {
3122 vm_object_lock_assert_exclusive(m->object);
3123
3124 m->object->pages_created++;
3125 #if DEVELOPMENT || DEBUG
3126 vm_page_speculative_created++;
3127 #endif
3128 }
3129 }
3130 VM_PAGE_CHECK(m);
3131 }
3132
3133
3134 /*
3135 * move pages from the specified aging bin to
3136 * the speculative bin that pageout_scan claims from
3137 *
3138 * The page queues must be locked.
3139 */
3140 void
3141 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3142 {
3143 struct vm_speculative_age_q *sq;
3144 vm_page_t t;
3145
3146 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3147
3148 if (queue_empty(&sq->age_q)) {
3149 sq->age_q.next = aq->age_q.next;
3150 sq->age_q.prev = aq->age_q.prev;
3151
3152 t = (vm_page_t)sq->age_q.next;
3153 t->pageq.prev = &sq->age_q;
3154
3155 t = (vm_page_t)sq->age_q.prev;
3156 t->pageq.next = &sq->age_q;
3157 } else {
3158 t = (vm_page_t)sq->age_q.prev;
3159 t->pageq.next = aq->age_q.next;
3160
3161 t = (vm_page_t)aq->age_q.next;
3162 t->pageq.prev = sq->age_q.prev;
3163
3164 t = (vm_page_t)aq->age_q.prev;
3165 t->pageq.next = &sq->age_q;
3166
3167 sq->age_q.prev = aq->age_q.prev;
3168 }
3169 queue_init(&aq->age_q);
3170 }
3171
3172
3173 void
3174 vm_page_lru(
3175 vm_page_t m)
3176 {
3177 VM_PAGE_CHECK(m);
3178 assert(m->object != kernel_object);
3179 assert(m->phys_page != vm_page_guard_addr);
3180
3181 #if DEBUG
3182 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3183 #endif
3184 /*
3185 * if this page is currently on the pageout queue, we can't do the
3186 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3187 * and we can't remove it manually since we would need the object lock
3188 * (which is not required here) to decrement the activity_in_progress
3189 * reference which is held on the object while the page is in the pageout queue...
3190 * just let the normal laundry processing proceed
3191 */
3192 if (m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3193 return;
3194
3195 m->no_cache = FALSE;
3196
3197 VM_PAGE_QUEUES_REMOVE(m);
3198
3199 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3200 }
3201
3202
3203 void
3204 vm_page_reactivate_all_throttled(void)
3205 {
3206 vm_page_t first_throttled, last_throttled;
3207 vm_page_t first_active;
3208 vm_page_t m;
3209 int extra_active_count;
3210 int extra_internal_count, extra_external_count;
3211
3212 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3213 return;
3214
3215 extra_active_count = 0;
3216 extra_internal_count = 0;
3217 extra_external_count = 0;
3218 vm_page_lock_queues();
3219 if (! queue_empty(&vm_page_queue_throttled)) {
3220 /*
3221 * Switch "throttled" pages to "active".
3222 */
3223 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3224 VM_PAGE_CHECK(m);
3225 assert(m->throttled);
3226 assert(!m->active);
3227 assert(!m->inactive);
3228 assert(!m->speculative);
3229 assert(!VM_PAGE_WIRED(m));
3230
3231 extra_active_count++;
3232 if (m->object->internal) {
3233 extra_internal_count++;
3234 } else {
3235 extra_external_count++;
3236 }
3237
3238 m->throttled = FALSE;
3239 m->active = TRUE;
3240 VM_PAGE_CHECK(m);
3241 }
3242
3243 /*
3244 * Transfer the entire throttled queue to a regular LRU page queues.
3245 * We insert it at the head of the active queue, so that these pages
3246 * get re-evaluated by the LRU algorithm first, since they've been
3247 * completely out of it until now.
3248 */
3249 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3250 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3251 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3252 if (queue_empty(&vm_page_queue_active)) {
3253 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3254 } else {
3255 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3256 }
3257 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3258 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3259 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3260
3261 #if DEBUG
3262 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3263 #endif
3264 queue_init(&vm_page_queue_throttled);
3265 /*
3266 * Adjust the global page counts.
3267 */
3268 vm_page_active_count += extra_active_count;
3269 vm_page_pageable_internal_count += extra_internal_count;
3270 vm_page_pageable_external_count += extra_external_count;
3271 vm_page_throttled_count = 0;
3272 }
3273 assert(vm_page_throttled_count == 0);
3274 assert(queue_empty(&vm_page_queue_throttled));
3275 vm_page_unlock_queues();
3276 }
3277
3278
3279 /*
3280 * move pages from the indicated local queue to the global active queue
3281 * its ok to fail if we're below the hard limit and force == FALSE
3282 * the nolocks == TRUE case is to allow this function to be run on
3283 * the hibernate path
3284 */
3285
3286 void
3287 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3288 {
3289 struct vpl *lq;
3290 vm_page_t first_local, last_local;
3291 vm_page_t first_active;
3292 vm_page_t m;
3293 uint32_t count = 0;
3294
3295 if (vm_page_local_q == NULL)
3296 return;
3297
3298 lq = &vm_page_local_q[lid].vpl_un.vpl;
3299
3300 if (nolocks == FALSE) {
3301 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3302 if ( !vm_page_trylockspin_queues())
3303 return;
3304 } else
3305 vm_page_lockspin_queues();
3306
3307 VPL_LOCK(&lq->vpl_lock);
3308 }
3309 if (lq->vpl_count) {
3310 /*
3311 * Switch "local" pages to "active".
3312 */
3313 assert(!queue_empty(&lq->vpl_queue));
3314
3315 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3316 VM_PAGE_CHECK(m);
3317 assert(m->local);
3318 assert(!m->active);
3319 assert(!m->inactive);
3320 assert(!m->speculative);
3321 assert(!VM_PAGE_WIRED(m));
3322 assert(!m->throttled);
3323 assert(!m->fictitious);
3324
3325 if (m->local_id != lid)
3326 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3327
3328 m->local_id = 0;
3329 m->local = FALSE;
3330 m->active = TRUE;
3331 VM_PAGE_CHECK(m);
3332
3333 count++;
3334 }
3335 if (count != lq->vpl_count)
3336 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3337
3338 /*
3339 * Transfer the entire local queue to a regular LRU page queues.
3340 */
3341 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3342 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3343 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3344
3345 if (queue_empty(&vm_page_queue_active)) {
3346 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3347 } else {
3348 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3349 }
3350 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3351 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3352 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3353
3354 queue_init(&lq->vpl_queue);
3355 /*
3356 * Adjust the global page counts.
3357 */
3358 vm_page_active_count += lq->vpl_count;
3359 vm_page_pageable_internal_count += lq->vpl_internal_count;
3360 vm_page_pageable_external_count += lq->vpl_external_count;
3361 lq->vpl_count = 0;
3362 lq->vpl_internal_count = 0;
3363 lq->vpl_external_count = 0;
3364 }
3365 assert(queue_empty(&lq->vpl_queue));
3366
3367 if (nolocks == FALSE) {
3368 VPL_UNLOCK(&lq->vpl_lock);
3369 vm_page_unlock_queues();
3370 }
3371 }
3372
3373 /*
3374 * vm_page_part_zero_fill:
3375 *
3376 * Zero-fill a part of the page.
3377 */
3378 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3379 void
3380 vm_page_part_zero_fill(
3381 vm_page_t m,
3382 vm_offset_t m_pa,
3383 vm_size_t len)
3384 {
3385
3386 #if 0
3387 /*
3388 * we don't hold the page queue lock
3389 * so this check isn't safe to make
3390 */
3391 VM_PAGE_CHECK(m);
3392 #endif
3393
3394 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3395 pmap_zero_part_page(m->phys_page, m_pa, len);
3396 #else
3397 vm_page_t tmp;
3398 while (1) {
3399 tmp = vm_page_grab();
3400 if (tmp == VM_PAGE_NULL) {
3401 vm_page_wait(THREAD_UNINT);
3402 continue;
3403 }
3404 break;
3405 }
3406 vm_page_zero_fill(tmp);
3407 if(m_pa != 0) {
3408 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3409 }
3410 if((m_pa + len) < PAGE_SIZE) {
3411 vm_page_part_copy(m, m_pa + len, tmp,
3412 m_pa + len, PAGE_SIZE - (m_pa + len));
3413 }
3414 vm_page_copy(tmp,m);
3415 VM_PAGE_FREE(tmp);
3416 #endif
3417
3418 }
3419
3420 /*
3421 * vm_page_zero_fill:
3422 *
3423 * Zero-fill the specified page.
3424 */
3425 void
3426 vm_page_zero_fill(
3427 vm_page_t m)
3428 {
3429 XPR(XPR_VM_PAGE,
3430 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3431 m->object, m->offset, m, 0,0);
3432 #if 0
3433 /*
3434 * we don't hold the page queue lock
3435 * so this check isn't safe to make
3436 */
3437 VM_PAGE_CHECK(m);
3438 #endif
3439
3440 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3441 pmap_zero_page(m->phys_page);
3442 }
3443
3444 /*
3445 * vm_page_part_copy:
3446 *
3447 * copy part of one page to another
3448 */
3449
3450 void
3451 vm_page_part_copy(
3452 vm_page_t src_m,
3453 vm_offset_t src_pa,
3454 vm_page_t dst_m,
3455 vm_offset_t dst_pa,
3456 vm_size_t len)
3457 {
3458 #if 0
3459 /*
3460 * we don't hold the page queue lock
3461 * so this check isn't safe to make
3462 */
3463 VM_PAGE_CHECK(src_m);
3464 VM_PAGE_CHECK(dst_m);
3465 #endif
3466 pmap_copy_part_page(src_m->phys_page, src_pa,
3467 dst_m->phys_page, dst_pa, len);
3468 }
3469
3470 /*
3471 * vm_page_copy:
3472 *
3473 * Copy one page to another
3474 *
3475 * ENCRYPTED SWAP:
3476 * The source page should not be encrypted. The caller should
3477 * make sure the page is decrypted first, if necessary.
3478 */
3479
3480 int vm_page_copy_cs_validations = 0;
3481 int vm_page_copy_cs_tainted = 0;
3482
3483 void
3484 vm_page_copy(
3485 vm_page_t src_m,
3486 vm_page_t dest_m)
3487 {
3488 XPR(XPR_VM_PAGE,
3489 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3490 src_m->object, src_m->offset,
3491 dest_m->object, dest_m->offset,
3492 0);
3493 #if 0
3494 /*
3495 * we don't hold the page queue lock
3496 * so this check isn't safe to make
3497 */
3498 VM_PAGE_CHECK(src_m);
3499 VM_PAGE_CHECK(dest_m);
3500 #endif
3501 vm_object_lock_assert_held(src_m->object);
3502
3503 /*
3504 * ENCRYPTED SWAP:
3505 * The source page should not be encrypted at this point.
3506 * The destination page will therefore not contain encrypted
3507 * data after the copy.
3508 */
3509 if (src_m->encrypted) {
3510 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3511 }
3512 dest_m->encrypted = FALSE;
3513
3514 if (src_m->object != VM_OBJECT_NULL &&
3515 src_m->object->code_signed) {
3516 /*
3517 * We're copying a page from a code-signed object.
3518 * Whoever ends up mapping the copy page might care about
3519 * the original page's integrity, so let's validate the
3520 * source page now.
3521 */
3522 vm_page_copy_cs_validations++;
3523 vm_page_validate_cs(src_m);
3524 }
3525
3526 if (vm_page_is_slideable(src_m)) {
3527 boolean_t was_busy = src_m->busy;
3528 src_m->busy = TRUE;
3529 (void) vm_page_slide(src_m, 0);
3530 assert(src_m->busy);
3531 if (!was_busy) {
3532 PAGE_WAKEUP_DONE(src_m);
3533 }
3534 }
3535
3536 /*
3537 * Propagate the cs_tainted bit to the copy page. Do not propagate
3538 * the cs_validated bit.
3539 */
3540 dest_m->cs_tainted = src_m->cs_tainted;
3541 if (dest_m->cs_tainted) {
3542 vm_page_copy_cs_tainted++;
3543 }
3544 dest_m->slid = src_m->slid;
3545 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3546 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3547 }
3548
3549 #if MACH_ASSERT
3550 static void
3551 _vm_page_print(
3552 vm_page_t p)
3553 {
3554 printf("vm_page %p: \n", p);
3555 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3556 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3557 printf(" next=%p\n", p->next);
3558 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3559 printf(" wire_count=%u\n", p->wire_count);
3560
3561 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3562 (p->local ? "" : "!"),
3563 (p->inactive ? "" : "!"),
3564 (p->active ? "" : "!"),
3565 (p->pageout_queue ? "" : "!"),
3566 (p->speculative ? "" : "!"),
3567 (p->laundry ? "" : "!"));
3568 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3569 (p->free ? "" : "!"),
3570 (p->reference ? "" : "!"),
3571 (p->gobbled ? "" : "!"),
3572 (p->private ? "" : "!"),
3573 (p->throttled ? "" : "!"));
3574 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3575 (p->busy ? "" : "!"),
3576 (p->wanted ? "" : "!"),
3577 (p->tabled ? "" : "!"),
3578 (p->fictitious ? "" : "!"),
3579 (p->pmapped ? "" : "!"),
3580 (p->wpmapped ? "" : "!"));
3581 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3582 (p->pageout ? "" : "!"),
3583 (p->absent ? "" : "!"),
3584 (p->error ? "" : "!"),
3585 (p->dirty ? "" : "!"),
3586 (p->cleaning ? "" : "!"),
3587 (p->precious ? "" : "!"),
3588 (p->clustered ? "" : "!"));
3589 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3590 (p->overwriting ? "" : "!"),
3591 (p->restart ? "" : "!"),
3592 (p->unusual ? "" : "!"),
3593 (p->encrypted ? "" : "!"),
3594 (p->encrypted_cleaning ? "" : "!"));
3595 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3596 (p->cs_validated ? "" : "!"),
3597 (p->cs_tainted ? "" : "!"),
3598 (p->no_cache ? "" : "!"));
3599
3600 printf("phys_page=0x%x\n", p->phys_page);
3601 }
3602
3603 /*
3604 * Check that the list of pages is ordered by
3605 * ascending physical address and has no holes.
3606 */
3607 static int
3608 vm_page_verify_contiguous(
3609 vm_page_t pages,
3610 unsigned int npages)
3611 {
3612 register vm_page_t m;
3613 unsigned int page_count;
3614 vm_offset_t prev_addr;
3615
3616 prev_addr = pages->phys_page;
3617 page_count = 1;
3618 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3619 if (m->phys_page != prev_addr + 1) {
3620 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3621 m, (long)prev_addr, m->phys_page);
3622 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3623 panic("vm_page_verify_contiguous: not contiguous!");
3624 }
3625 prev_addr = m->phys_page;
3626 ++page_count;
3627 }
3628 if (page_count != npages) {
3629 printf("pages %p actual count 0x%x but requested 0x%x\n",
3630 pages, page_count, npages);
3631 panic("vm_page_verify_contiguous: count error");
3632 }
3633 return 1;
3634 }
3635
3636
3637 /*
3638 * Check the free lists for proper length etc.
3639 */
3640 static unsigned int
3641 vm_page_verify_free_list(
3642 queue_head_t *vm_page_queue,
3643 unsigned int color,
3644 vm_page_t look_for_page,
3645 boolean_t expect_page)
3646 {
3647 unsigned int npages;
3648 vm_page_t m;
3649 vm_page_t prev_m;
3650 boolean_t found_page;
3651
3652 found_page = FALSE;
3653 npages = 0;
3654 prev_m = (vm_page_t) vm_page_queue;
3655 queue_iterate(vm_page_queue,
3656 m,
3657 vm_page_t,
3658 pageq) {
3659
3660 if (m == look_for_page) {
3661 found_page = TRUE;
3662 }
3663 if ((vm_page_t) m->pageq.prev != prev_m)
3664 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3665 color, npages, m, m->pageq.prev, prev_m);
3666 if ( ! m->busy )
3667 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3668 color, npages, m);
3669 if (color != (unsigned int) -1) {
3670 if ((m->phys_page & vm_color_mask) != color)
3671 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3672 color, npages, m, m->phys_page & vm_color_mask, color);
3673 if ( ! m->free )
3674 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3675 color, npages, m);
3676 }
3677 ++npages;
3678 prev_m = m;
3679 }
3680 if (look_for_page != VM_PAGE_NULL) {
3681 unsigned int other_color;
3682
3683 if (expect_page && !found_page) {
3684 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3685 color, npages, look_for_page, look_for_page->phys_page);
3686 _vm_page_print(look_for_page);
3687 for (other_color = 0;
3688 other_color < vm_colors;
3689 other_color++) {
3690 if (other_color == color)
3691 continue;
3692 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3693 other_color, look_for_page, FALSE);
3694 }
3695 if (color == (unsigned int) -1) {
3696 vm_page_verify_free_list(&vm_lopage_queue_free,
3697 (unsigned int) -1, look_for_page, FALSE);
3698 }
3699 panic("vm_page_verify_free_list(color=%u)\n", color);
3700 }
3701 if (!expect_page && found_page) {
3702 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3703 color, npages, look_for_page, look_for_page->phys_page);
3704 }
3705 }
3706 return npages;
3707 }
3708
3709 static boolean_t vm_page_verify_free_lists_enabled = FALSE;
3710 static void
3711 vm_page_verify_free_lists( void )
3712 {
3713 unsigned int color, npages, nlopages;
3714
3715 if (! vm_page_verify_free_lists_enabled)
3716 return;
3717
3718 npages = 0;
3719
3720 lck_mtx_lock(&vm_page_queue_free_lock);
3721
3722 for( color = 0; color < vm_colors; color++ ) {
3723 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3724 color, VM_PAGE_NULL, FALSE);
3725 }
3726 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3727 (unsigned int) -1,
3728 VM_PAGE_NULL, FALSE);
3729 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3730 panic("vm_page_verify_free_lists: "
3731 "npages %u free_count %d nlopages %u lo_free_count %u",
3732 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3733
3734 lck_mtx_unlock(&vm_page_queue_free_lock);
3735 }
3736
3737 void
3738 vm_page_queues_assert(
3739 vm_page_t mem,
3740 int val)
3741 {
3742 #if DEBUG
3743 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3744 #endif
3745 if (mem->free + mem->active + mem->inactive + mem->speculative +
3746 mem->throttled + mem->pageout_queue > (val)) {
3747 _vm_page_print(mem);
3748 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3749 }
3750 if (VM_PAGE_WIRED(mem)) {
3751 assert(!mem->active);
3752 assert(!mem->inactive);
3753 assert(!mem->speculative);
3754 assert(!mem->throttled);
3755 assert(!mem->pageout_queue);
3756 }
3757 }
3758 #endif /* MACH_ASSERT */
3759
3760
3761 /*
3762 * CONTIGUOUS PAGE ALLOCATION
3763 *
3764 * Find a region large enough to contain at least n pages
3765 * of contiguous physical memory.
3766 *
3767 * This is done by traversing the vm_page_t array in a linear fashion
3768 * we assume that the vm_page_t array has the avaiable physical pages in an
3769 * ordered, ascending list... this is currently true of all our implementations
3770 * and must remain so... there can be 'holes' in the array... we also can
3771 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
3772 * which use to happen via 'vm_page_convert'... that function was no longer
3773 * being called and was removed...
3774 *
3775 * The basic flow consists of stabilizing some of the interesting state of
3776 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
3777 * sweep at the beginning of the array looking for pages that meet our criterea
3778 * for a 'stealable' page... currently we are pretty conservative... if the page
3779 * meets this criterea and is physically contiguous to the previous page in the 'run'
3780 * we keep developing it. If we hit a page that doesn't fit, we reset our state
3781 * and start to develop a new run... if at this point we've already considered
3782 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
3783 * and mutex_pause (which will yield the processor), to keep the latency low w/r
3784 * to other threads trying to acquire free pages (or move pages from q to q),
3785 * and then continue from the spot we left off... we only make 1 pass through the
3786 * array. Once we have a 'run' that is long enough, we'll go into the loop which
3787 * which steals the pages from the queues they're currently on... pages on the free
3788 * queue can be stolen directly... pages that are on any of the other queues
3789 * must be removed from the object they are tabled on... this requires taking the
3790 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
3791 * or if the state of the page behind the vm_object lock is no longer viable, we'll
3792 * dump the pages we've currently stolen back to the free list, and pick up our
3793 * scan from the point where we aborted the 'current' run.
3794 *
3795 *
3796 * Requirements:
3797 * - neither vm_page_queue nor vm_free_list lock can be held on entry
3798 *
3799 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
3800 *
3801 * Algorithm:
3802 */
3803
3804 #define MAX_CONSIDERED_BEFORE_YIELD 1000
3805
3806
3807 #define RESET_STATE_OF_RUN() \
3808 MACRO_BEGIN \
3809 prevcontaddr = -2; \
3810 start_pnum = -1; \
3811 free_considered = 0; \
3812 substitute_needed = 0; \
3813 npages = 0; \
3814 MACRO_END
3815
3816 /*
3817 * Can we steal in-use (i.e. not free) pages when searching for
3818 * physically-contiguous pages ?
3819 */
3820 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
3821
3822 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
3823 #if DEBUG
3824 int vm_page_find_contig_debug = 0;
3825 #endif
3826
3827 static vm_page_t
3828 vm_page_find_contiguous(
3829 unsigned int contig_pages,
3830 ppnum_t max_pnum,
3831 ppnum_t pnum_mask,
3832 boolean_t wire,
3833 int flags)
3834 {
3835 vm_page_t m = NULL;
3836 ppnum_t prevcontaddr;
3837 ppnum_t start_pnum;
3838 unsigned int npages, considered, scanned;
3839 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
3840 unsigned int idx_last_contig_page_found = 0;
3841 int free_considered, free_available;
3842 int substitute_needed;
3843 boolean_t wrapped;
3844 #if DEBUG
3845 clock_sec_t tv_start_sec, tv_end_sec;
3846 clock_usec_t tv_start_usec, tv_end_usec;
3847 #endif
3848 #if MACH_ASSERT
3849 int yielded = 0;
3850 int dumped_run = 0;
3851 int stolen_pages = 0;
3852 int compressed_pages = 0;
3853 #endif
3854
3855 if (contig_pages == 0)
3856 return VM_PAGE_NULL;
3857
3858 #if MACH_ASSERT
3859 vm_page_verify_free_lists();
3860 #endif
3861 #if DEBUG
3862 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
3863 #endif
3864 PAGE_REPLACEMENT_ALLOWED(TRUE);
3865
3866 vm_page_lock_queues();
3867 lck_mtx_lock(&vm_page_queue_free_lock);
3868
3869 RESET_STATE_OF_RUN();
3870
3871 scanned = 0;
3872 considered = 0;
3873 free_available = vm_page_free_count - vm_page_free_reserved;
3874
3875 wrapped = FALSE;
3876
3877 if(flags & KMA_LOMEM)
3878 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
3879 else
3880 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
3881
3882 orig_last_idx = idx_last_contig_page_found;
3883 last_idx = orig_last_idx;
3884
3885 for (page_idx = last_idx, start_idx = last_idx;
3886 npages < contig_pages && page_idx < vm_pages_count;
3887 page_idx++) {
3888 retry:
3889 if (wrapped &&
3890 npages == 0 &&
3891 page_idx >= orig_last_idx) {
3892 /*
3893 * We're back where we started and we haven't
3894 * found any suitable contiguous range. Let's
3895 * give up.
3896 */
3897 break;
3898 }
3899 scanned++;
3900 m = &vm_pages[page_idx];
3901
3902 assert(!m->fictitious);
3903 assert(!m->private);
3904
3905 if (max_pnum && m->phys_page > max_pnum) {
3906 /* no more low pages... */
3907 break;
3908 }
3909 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
3910 /*
3911 * not aligned
3912 */
3913 RESET_STATE_OF_RUN();
3914
3915 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
3916 m->encrypted_cleaning ||
3917 m->pageout_queue || m->laundry || m->wanted ||
3918 m->cleaning || m->overwriting || m->pageout) {
3919 /*
3920 * page is in a transient state
3921 * or a state we don't want to deal
3922 * with, so don't consider it which
3923 * means starting a new run
3924 */
3925 RESET_STATE_OF_RUN();
3926
3927 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
3928 /*
3929 * page needs to be on one of our queues
3930 * or it needs to belong to the compressor pool
3931 * in order for it to be stable behind the
3932 * locks we hold at this point...
3933 * if not, don't consider it which
3934 * means starting a new run
3935 */
3936 RESET_STATE_OF_RUN();
3937
3938 } else if (!m->free && (!m->tabled || m->busy)) {
3939 /*
3940 * pages on the free list are always 'busy'
3941 * so we couldn't test for 'busy' in the check
3942 * for the transient states... pages that are
3943 * 'free' are never 'tabled', so we also couldn't
3944 * test for 'tabled'. So we check here to make
3945 * sure that a non-free page is not busy and is
3946 * tabled on an object...
3947 * if not, don't consider it which
3948 * means starting a new run
3949 */
3950 RESET_STATE_OF_RUN();
3951
3952 } else {
3953 if (m->phys_page != prevcontaddr + 1) {
3954 if ((m->phys_page & pnum_mask) != 0) {
3955 RESET_STATE_OF_RUN();
3956 goto did_consider;
3957 } else {
3958 npages = 1;
3959 start_idx = page_idx;
3960 start_pnum = m->phys_page;
3961 }
3962 } else {
3963 npages++;
3964 }
3965 prevcontaddr = m->phys_page;
3966
3967 VM_PAGE_CHECK(m);
3968 if (m->free) {
3969 free_considered++;
3970 } else {
3971 /*
3972 * This page is not free.
3973 * If we can't steal used pages,
3974 * we have to give up this run
3975 * and keep looking.
3976 * Otherwise, we might need to
3977 * move the contents of this page
3978 * into a substitute page.
3979 */
3980 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
3981 if (m->pmapped || m->dirty || m->precious) {
3982 substitute_needed++;
3983 }
3984 #else
3985 RESET_STATE_OF_RUN();
3986 #endif
3987 }
3988
3989 if ((free_considered + substitute_needed) > free_available) {
3990 /*
3991 * if we let this run continue
3992 * we will end up dropping the vm_page_free_count
3993 * below the reserve limit... we need to abort
3994 * this run, but we can at least re-consider this
3995 * page... thus the jump back to 'retry'
3996 */
3997 RESET_STATE_OF_RUN();
3998
3999 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4000 considered++;
4001 goto retry;
4002 }
4003 /*
4004 * free_available == 0
4005 * so can't consider any free pages... if
4006 * we went to retry in this case, we'd
4007 * get stuck looking at the same page
4008 * w/o making any forward progress
4009 * we also want to take this path if we've already
4010 * reached our limit that controls the lock latency
4011 */
4012 }
4013 }
4014 did_consider:
4015 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4016
4017 PAGE_REPLACEMENT_ALLOWED(FALSE);
4018
4019 lck_mtx_unlock(&vm_page_queue_free_lock);
4020 vm_page_unlock_queues();
4021
4022 mutex_pause(0);
4023
4024 PAGE_REPLACEMENT_ALLOWED(TRUE);
4025
4026 vm_page_lock_queues();
4027 lck_mtx_lock(&vm_page_queue_free_lock);
4028
4029 RESET_STATE_OF_RUN();
4030 /*
4031 * reset our free page limit since we
4032 * dropped the lock protecting the vm_page_free_queue
4033 */
4034 free_available = vm_page_free_count - vm_page_free_reserved;
4035 considered = 0;
4036 #if MACH_ASSERT
4037 yielded++;
4038 #endif
4039 goto retry;
4040 }
4041 considered++;
4042 }
4043 m = VM_PAGE_NULL;
4044
4045 if (npages != contig_pages) {
4046 if (!wrapped) {
4047 /*
4048 * We didn't find a contiguous range but we didn't
4049 * start from the very first page.
4050 * Start again from the very first page.
4051 */
4052 RESET_STATE_OF_RUN();
4053 if( flags & KMA_LOMEM)
4054 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4055 else
4056 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4057 last_idx = 0;
4058 page_idx = last_idx;
4059 wrapped = TRUE;
4060 goto retry;
4061 }
4062 lck_mtx_unlock(&vm_page_queue_free_lock);
4063 } else {
4064 vm_page_t m1;
4065 vm_page_t m2;
4066 unsigned int cur_idx;
4067 unsigned int tmp_start_idx;
4068 vm_object_t locked_object = VM_OBJECT_NULL;
4069 boolean_t abort_run = FALSE;
4070
4071 assert(page_idx - start_idx == contig_pages);
4072
4073 tmp_start_idx = start_idx;
4074
4075 /*
4076 * first pass through to pull the free pages
4077 * off of the free queue so that in case we
4078 * need substitute pages, we won't grab any
4079 * of the free pages in the run... we'll clear
4080 * the 'free' bit in the 2nd pass, and even in
4081 * an abort_run case, we'll collect all of the
4082 * free pages in this run and return them to the free list
4083 */
4084 while (start_idx < page_idx) {
4085
4086 m1 = &vm_pages[start_idx++];
4087
4088 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4089 assert(m1->free);
4090 #endif
4091
4092 if (m1->free) {
4093 unsigned int color;
4094
4095 color = m1->phys_page & vm_color_mask;
4096 #if MACH_ASSERT
4097 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4098 #endif
4099 queue_remove(&vm_page_queue_free[color],
4100 m1,
4101 vm_page_t,
4102 pageq);
4103 m1->pageq.next = NULL;
4104 m1->pageq.prev = NULL;
4105 #if MACH_ASSERT
4106 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4107 #endif
4108 /*
4109 * Clear the "free" bit so that this page
4110 * does not get considered for another
4111 * concurrent physically-contiguous allocation.
4112 */
4113 m1->free = FALSE;
4114 assert(m1->busy);
4115
4116 vm_page_free_count--;
4117 }
4118 }
4119 /*
4120 * adjust global freelist counts
4121 */
4122 if (vm_page_free_count < vm_page_free_count_minimum)
4123 vm_page_free_count_minimum = vm_page_free_count;
4124
4125 if( flags & KMA_LOMEM)
4126 vm_page_lomem_find_contiguous_last_idx = page_idx;
4127 else
4128 vm_page_find_contiguous_last_idx = page_idx;
4129
4130 /*
4131 * we can drop the free queue lock at this point since
4132 * we've pulled any 'free' candidates off of the list
4133 * we need it dropped so that we can do a vm_page_grab
4134 * when substituing for pmapped/dirty pages
4135 */
4136 lck_mtx_unlock(&vm_page_queue_free_lock);
4137
4138 start_idx = tmp_start_idx;
4139 cur_idx = page_idx - 1;
4140
4141 while (start_idx++ < page_idx) {
4142 /*
4143 * must go through the list from back to front
4144 * so that the page list is created in the
4145 * correct order - low -> high phys addresses
4146 */
4147 m1 = &vm_pages[cur_idx--];
4148
4149 assert(!m1->free);
4150
4151 if (m1->object == VM_OBJECT_NULL) {
4152 /*
4153 * page has already been removed from
4154 * the free list in the 1st pass
4155 */
4156 assert(m1->offset == (vm_object_offset_t) -1);
4157 assert(m1->busy);
4158 assert(!m1->wanted);
4159 assert(!m1->laundry);
4160 } else {
4161 vm_object_t object;
4162 int refmod;
4163 boolean_t disconnected, reusable;
4164
4165 if (abort_run == TRUE)
4166 continue;
4167
4168 object = m1->object;
4169
4170 if (object != locked_object) {
4171 if (locked_object) {
4172 vm_object_unlock(locked_object);
4173 locked_object = VM_OBJECT_NULL;
4174 }
4175 if (vm_object_lock_try(object))
4176 locked_object = object;
4177 }
4178 if (locked_object == VM_OBJECT_NULL ||
4179 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4180 m1->encrypted_cleaning ||
4181 m1->pageout_queue || m1->laundry || m1->wanted ||
4182 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4183
4184 if (locked_object) {
4185 vm_object_unlock(locked_object);
4186 locked_object = VM_OBJECT_NULL;
4187 }
4188 tmp_start_idx = cur_idx;
4189 abort_run = TRUE;
4190 continue;
4191 }
4192
4193 disconnected = FALSE;
4194 reusable = FALSE;
4195
4196 if ((m1->reusable ||
4197 m1->object->all_reusable) &&
4198 m1->inactive &&
4199 !m1->dirty &&
4200 !m1->reference) {
4201 /* reusable page... */
4202 refmod = pmap_disconnect(m1->phys_page);
4203 disconnected = TRUE;
4204 if (refmod == 0) {
4205 /*
4206 * ... not reused: can steal
4207 * without relocating contents.
4208 */
4209 reusable = TRUE;
4210 }
4211 }
4212
4213 if ((m1->pmapped &&
4214 ! reusable) ||
4215 m1->dirty ||
4216 m1->precious) {
4217 vm_object_offset_t offset;
4218
4219 m2 = vm_page_grab();
4220
4221 if (m2 == VM_PAGE_NULL) {
4222 if (locked_object) {
4223 vm_object_unlock(locked_object);
4224 locked_object = VM_OBJECT_NULL;
4225 }
4226 tmp_start_idx = cur_idx;
4227 abort_run = TRUE;
4228 continue;
4229 }
4230 if (! disconnected) {
4231 if (m1->pmapped)
4232 refmod = pmap_disconnect(m1->phys_page);
4233 else
4234 refmod = 0;
4235 }
4236
4237 /* copy the page's contents */
4238 pmap_copy_page(m1->phys_page, m2->phys_page);
4239 /* copy the page's state */
4240 assert(!VM_PAGE_WIRED(m1));
4241 assert(!m1->free);
4242 assert(!m1->pageout_queue);
4243 assert(!m1->laundry);
4244 m2->reference = m1->reference;
4245 assert(!m1->gobbled);
4246 assert(!m1->private);
4247 m2->no_cache = m1->no_cache;
4248 m2->xpmapped = m1->xpmapped;
4249 assert(!m1->busy);
4250 assert(!m1->wanted);
4251 assert(!m1->fictitious);
4252 m2->pmapped = m1->pmapped; /* should flush cache ? */
4253 m2->wpmapped = m1->wpmapped;
4254 assert(!m1->pageout);
4255 m2->absent = m1->absent;
4256 m2->error = m1->error;
4257 m2->dirty = m1->dirty;
4258 assert(!m1->cleaning);
4259 m2->precious = m1->precious;
4260 m2->clustered = m1->clustered;
4261 assert(!m1->overwriting);
4262 m2->restart = m1->restart;
4263 m2->unusual = m1->unusual;
4264 m2->encrypted = m1->encrypted;
4265 assert(!m1->encrypted_cleaning);
4266 m2->cs_validated = m1->cs_validated;
4267 m2->cs_tainted = m1->cs_tainted;
4268
4269 /*
4270 * If m1 had really been reusable,
4271 * we would have just stolen it, so
4272 * let's not propagate it's "reusable"
4273 * bit and assert that m2 is not
4274 * marked as "reusable".
4275 */
4276 // m2->reusable = m1->reusable;
4277 assert(!m2->reusable);
4278
4279 assert(!m1->lopage);
4280 m2->slid = m1->slid;
4281 m2->was_dirty = m1->was_dirty;
4282 m2->compressor = m1->compressor;
4283
4284 /*
4285 * make sure we clear the ref/mod state
4286 * from the pmap layer... else we risk
4287 * inheriting state from the last time
4288 * this page was used...
4289 */
4290 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4291
4292 if (refmod & VM_MEM_REFERENCED)
4293 m2->reference = TRUE;
4294 if (refmod & VM_MEM_MODIFIED) {
4295 SET_PAGE_DIRTY(m2, TRUE);
4296 }
4297 offset = m1->offset;
4298
4299 /*
4300 * completely cleans up the state
4301 * of the page so that it is ready
4302 * to be put onto the free list, or
4303 * for this purpose it looks like it
4304 * just came off of the free list
4305 */
4306 vm_page_free_prepare(m1);
4307
4308 /*
4309 * now put the substitute page
4310 * on the object
4311 */
4312 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4313
4314 if (m2->compressor) {
4315 m2->pmapped = TRUE;
4316 m2->wpmapped = TRUE;
4317
4318 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4319 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4320 #if MACH_ASSERT
4321 compressed_pages++;
4322 #endif
4323 } else {
4324 if (m2->reference)
4325 vm_page_activate(m2);
4326 else
4327 vm_page_deactivate(m2);
4328 }
4329 PAGE_WAKEUP_DONE(m2);
4330
4331 } else {
4332 assert(!m1->compressor);
4333
4334 /*
4335 * completely cleans up the state
4336 * of the page so that it is ready
4337 * to be put onto the free list, or
4338 * for this purpose it looks like it
4339 * just came off of the free list
4340 */
4341 vm_page_free_prepare(m1);
4342 }
4343 #if MACH_ASSERT
4344 stolen_pages++;
4345 #endif
4346 }
4347 m1->pageq.next = (queue_entry_t) m;
4348 m1->pageq.prev = NULL;
4349 m = m1;
4350 }
4351 if (locked_object) {
4352 vm_object_unlock(locked_object);
4353 locked_object = VM_OBJECT_NULL;
4354 }
4355
4356 if (abort_run == TRUE) {
4357 if (m != VM_PAGE_NULL) {
4358 vm_page_free_list(m, FALSE);
4359 }
4360 #if MACH_ASSERT
4361 dumped_run++;
4362 #endif
4363 /*
4364 * want the index of the last
4365 * page in this run that was
4366 * successfully 'stolen', so back
4367 * it up 1 for the auto-decrement on use
4368 * and 1 more to bump back over this page
4369 */
4370 page_idx = tmp_start_idx + 2;
4371 if (page_idx >= vm_pages_count) {
4372 if (wrapped)
4373 goto done_scanning;
4374 page_idx = last_idx = 0;
4375 wrapped = TRUE;
4376 }
4377 abort_run = FALSE;
4378
4379 /*
4380 * We didn't find a contiguous range but we didn't
4381 * start from the very first page.
4382 * Start again from the very first page.
4383 */
4384 RESET_STATE_OF_RUN();
4385
4386 if( flags & KMA_LOMEM)
4387 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4388 else
4389 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4390
4391 last_idx = page_idx;
4392
4393 lck_mtx_lock(&vm_page_queue_free_lock);
4394 /*
4395 * reset our free page limit since we
4396 * dropped the lock protecting the vm_page_free_queue
4397 */
4398 free_available = vm_page_free_count - vm_page_free_reserved;
4399 goto retry;
4400 }
4401
4402 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4403
4404 if (wire == TRUE)
4405 m1->wire_count++;
4406 else
4407 m1->gobbled = TRUE;
4408 }
4409 if (wire == FALSE)
4410 vm_page_gobble_count += npages;
4411
4412 /*
4413 * gobbled pages are also counted as wired pages
4414 */
4415 vm_page_wire_count += npages;
4416
4417 assert(vm_page_verify_contiguous(m, npages));
4418 }
4419 done_scanning:
4420 PAGE_REPLACEMENT_ALLOWED(FALSE);
4421
4422 vm_page_unlock_queues();
4423
4424 #if DEBUG
4425 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4426
4427 tv_end_sec -= tv_start_sec;
4428 if (tv_end_usec < tv_start_usec) {
4429 tv_end_sec--;
4430 tv_end_usec += 1000000;
4431 }
4432 tv_end_usec -= tv_start_usec;
4433 if (tv_end_usec >= 1000000) {
4434 tv_end_sec++;
4435 tv_end_sec -= 1000000;
4436 }
4437 if (vm_page_find_contig_debug) {
4438 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4439 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4440 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4441 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4442 }
4443
4444 #endif
4445 #if MACH_ASSERT
4446 vm_page_verify_free_lists();
4447 #endif
4448 return m;
4449 }
4450
4451 /*
4452 * Allocate a list of contiguous, wired pages.
4453 */
4454 kern_return_t
4455 cpm_allocate(
4456 vm_size_t size,
4457 vm_page_t *list,
4458 ppnum_t max_pnum,
4459 ppnum_t pnum_mask,
4460 boolean_t wire,
4461 int flags)
4462 {
4463 vm_page_t pages;
4464 unsigned int npages;
4465
4466 if (size % PAGE_SIZE != 0)
4467 return KERN_INVALID_ARGUMENT;
4468
4469 npages = (unsigned int) (size / PAGE_SIZE);
4470 if (npages != size / PAGE_SIZE) {
4471 /* 32-bit overflow */
4472 return KERN_INVALID_ARGUMENT;
4473 }
4474
4475 /*
4476 * Obtain a pointer to a subset of the free
4477 * list large enough to satisfy the request;
4478 * the region will be physically contiguous.
4479 */
4480 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4481
4482 if (pages == VM_PAGE_NULL)
4483 return KERN_NO_SPACE;
4484 /*
4485 * determine need for wakeups
4486 */
4487 if ((vm_page_free_count < vm_page_free_min) ||
4488 ((vm_page_free_count < vm_page_free_target) &&
4489 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4490 thread_wakeup((event_t) &vm_page_free_wanted);
4491
4492 VM_CHECK_MEMORYSTATUS;
4493
4494 /*
4495 * The CPM pages should now be available and
4496 * ordered by ascending physical address.
4497 */
4498 assert(vm_page_verify_contiguous(pages, npages));
4499
4500 *list = pages;
4501 return KERN_SUCCESS;
4502 }
4503
4504
4505 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4506
4507 /*
4508 * when working on a 'run' of pages, it is necessary to hold
4509 * the vm_page_queue_lock (a hot global lock) for certain operations
4510 * on the page... however, the majority of the work can be done
4511 * while merely holding the object lock... in fact there are certain
4512 * collections of pages that don't require any work brokered by the
4513 * vm_page_queue_lock... to mitigate the time spent behind the global
4514 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4515 * while doing all of the work that doesn't require the vm_page_queue_lock...
4516 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4517 * necessary work for each page... we will grab the busy bit on the page
4518 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4519 * if it can't immediately take the vm_page_queue_lock in order to compete
4520 * for the locks in the same order that vm_pageout_scan takes them.
4521 * the operation names are modeled after the names of the routines that
4522 * need to be called in order to make the changes very obvious in the
4523 * original loop
4524 */
4525
4526 void
4527 vm_page_do_delayed_work(
4528 vm_object_t object,
4529 struct vm_page_delayed_work *dwp,
4530 int dw_count)
4531 {
4532 int j;
4533 vm_page_t m;
4534 vm_page_t local_free_q = VM_PAGE_NULL;
4535
4536 /*
4537 * pageout_scan takes the vm_page_lock_queues first
4538 * then tries for the object lock... to avoid what
4539 * is effectively a lock inversion, we'll go to the
4540 * trouble of taking them in that same order... otherwise
4541 * if this object contains the majority of the pages resident
4542 * in the UBC (or a small set of large objects actively being
4543 * worked on contain the majority of the pages), we could
4544 * cause the pageout_scan thread to 'starve' in its attempt
4545 * to find pages to move to the free queue, since it has to
4546 * successfully acquire the object lock of any candidate page
4547 * before it can steal/clean it.
4548 */
4549 if (!vm_page_trylockspin_queues()) {
4550 vm_object_unlock(object);
4551
4552 vm_page_lockspin_queues();
4553
4554 for (j = 0; ; j++) {
4555 if (!vm_object_lock_avoid(object) &&
4556 _vm_object_lock_try(object))
4557 break;
4558 vm_page_unlock_queues();
4559 mutex_pause(j);
4560 vm_page_lockspin_queues();
4561 }
4562 }
4563 for (j = 0; j < dw_count; j++, dwp++) {
4564
4565 m = dwp->dw_m;
4566
4567 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4568 vm_pageout_throttle_up(m);
4569
4570 if (dwp->dw_mask & DW_vm_page_wire)
4571 vm_page_wire(m);
4572 else if (dwp->dw_mask & DW_vm_page_unwire) {
4573 boolean_t queueit;
4574
4575 queueit = (dwp->dw_mask & DW_vm_page_free) ? FALSE : TRUE;
4576
4577 vm_page_unwire(m, queueit);
4578 }
4579 if (dwp->dw_mask & DW_vm_page_free) {
4580 vm_page_free_prepare_queues(m);
4581
4582 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4583 /*
4584 * Add this page to our list of reclaimed pages,
4585 * to be freed later.
4586 */
4587 m->pageq.next = (queue_entry_t) local_free_q;
4588 local_free_q = m;
4589 } else {
4590 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4591 vm_page_deactivate_internal(m, FALSE);
4592 else if (dwp->dw_mask & DW_vm_page_activate) {
4593 if (m->active == FALSE) {
4594 vm_page_activate(m);
4595 }
4596 }
4597 else if (dwp->dw_mask & DW_vm_page_speculate)
4598 vm_page_speculate(m, TRUE);
4599 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4600 /*
4601 * if we didn't hold the object lock and did this,
4602 * we might disconnect the page, then someone might
4603 * soft fault it back in, then we would put it on the
4604 * cleaned queue, and so we would have a referenced (maybe even dirty)
4605 * page on that queue, which we don't want
4606 */
4607 int refmod_state = pmap_disconnect(m->phys_page);
4608
4609 if ((refmod_state & VM_MEM_REFERENCED)) {
4610 /*
4611 * this page has been touched since it got cleaned; let's activate it
4612 * if it hasn't already been
4613 */
4614 vm_pageout_enqueued_cleaned++;
4615 vm_pageout_cleaned_reactivated++;
4616 vm_pageout_cleaned_commit_reactivated++;
4617
4618 if (m->active == FALSE)
4619 vm_page_activate(m);
4620 } else {
4621 m->reference = FALSE;
4622 vm_page_enqueue_cleaned(m);
4623 }
4624 }
4625 else if (dwp->dw_mask & DW_vm_page_lru)
4626 vm_page_lru(m);
4627 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4628 if ( !m->pageout_queue)
4629 VM_PAGE_QUEUES_REMOVE(m);
4630 }
4631 if (dwp->dw_mask & DW_set_reference)
4632 m->reference = TRUE;
4633 else if (dwp->dw_mask & DW_clear_reference)
4634 m->reference = FALSE;
4635
4636 if (dwp->dw_mask & DW_move_page) {
4637 if ( !m->pageout_queue) {
4638 VM_PAGE_QUEUES_REMOVE(m);
4639
4640 assert(m->object != kernel_object);
4641
4642 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4643 }
4644 }
4645 if (dwp->dw_mask & DW_clear_busy)
4646 m->busy = FALSE;
4647
4648 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4649 PAGE_WAKEUP(m);
4650 }
4651 }
4652 vm_page_unlock_queues();
4653
4654 if (local_free_q)
4655 vm_page_free_list(local_free_q, TRUE);
4656
4657 VM_CHECK_MEMORYSTATUS;
4658
4659 }
4660
4661 kern_return_t
4662 vm_page_alloc_list(
4663 int page_count,
4664 int flags,
4665 vm_page_t *list)
4666 {
4667 vm_page_t lo_page_list = VM_PAGE_NULL;
4668 vm_page_t mem;
4669 int i;
4670
4671 if ( !(flags & KMA_LOMEM))
4672 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4673
4674 for (i = 0; i < page_count; i++) {
4675
4676 mem = vm_page_grablo();
4677
4678 if (mem == VM_PAGE_NULL) {
4679 if (lo_page_list)
4680 vm_page_free_list(lo_page_list, FALSE);
4681
4682 *list = VM_PAGE_NULL;
4683
4684 return (KERN_RESOURCE_SHORTAGE);
4685 }
4686 mem->pageq.next = (queue_entry_t) lo_page_list;
4687 lo_page_list = mem;
4688 }
4689 *list = lo_page_list;
4690
4691 return (KERN_SUCCESS);
4692 }
4693
4694 void
4695 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4696 {
4697 page->offset = offset;
4698 }
4699
4700 vm_page_t
4701 vm_page_get_next(vm_page_t page)
4702 {
4703 return ((vm_page_t) page->pageq.next);
4704 }
4705
4706 vm_object_offset_t
4707 vm_page_get_offset(vm_page_t page)
4708 {
4709 return (page->offset);
4710 }
4711
4712 ppnum_t
4713 vm_page_get_phys_page(vm_page_t page)
4714 {
4715 return (page->phys_page);
4716 }
4717
4718
4719 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4720
4721 #if HIBERNATION
4722
4723 static vm_page_t hibernate_gobble_queue;
4724
4725 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4726
4727 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4728 static int hibernate_flush_dirty_pages(int);
4729 static int hibernate_flush_queue(queue_head_t *, int);
4730
4731 void hibernate_flush_wait(void);
4732 void hibernate_mark_in_progress(void);
4733 void hibernate_clear_in_progress(void);
4734
4735 void hibernate_free_range(int, int);
4736 void hibernate_hash_insert_page(vm_page_t);
4737 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4738 void hibernate_rebuild_vm_structs(void);
4739 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4740 ppnum_t hibernate_lookup_paddr(unsigned int);
4741
4742 struct hibernate_statistics {
4743 int hibernate_considered;
4744 int hibernate_reentered_on_q;
4745 int hibernate_found_dirty;
4746 int hibernate_skipped_cleaning;
4747 int hibernate_skipped_transient;
4748 int hibernate_skipped_precious;
4749 int hibernate_skipped_external;
4750 int hibernate_queue_nolock;
4751 int hibernate_queue_paused;
4752 int hibernate_throttled;
4753 int hibernate_throttle_timeout;
4754 int hibernate_drained;
4755 int hibernate_drain_timeout;
4756 int cd_lock_failed;
4757 int cd_found_precious;
4758 int cd_found_wired;
4759 int cd_found_busy;
4760 int cd_found_unusual;
4761 int cd_found_cleaning;
4762 int cd_found_laundry;
4763 int cd_found_dirty;
4764 int cd_found_xpmapped;
4765 int cd_local_free;
4766 int cd_total_free;
4767 int cd_vm_page_wire_count;
4768 int cd_vm_struct_pages_unneeded;
4769 int cd_pages;
4770 int cd_discarded;
4771 int cd_count_wire;
4772 } hibernate_stats;
4773
4774
4775
4776 static int
4777 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
4778 {
4779 wait_result_t wait_result;
4780
4781 vm_page_lock_queues();
4782
4783 while ( !queue_empty(&q->pgo_pending) ) {
4784
4785 q->pgo_draining = TRUE;
4786
4787 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
4788
4789 vm_page_unlock_queues();
4790
4791 wait_result = thread_block(THREAD_CONTINUE_NULL);
4792
4793 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
4794 hibernate_stats.hibernate_drain_timeout++;
4795
4796 if (q == &vm_pageout_queue_external)
4797 return (0);
4798
4799 return (1);
4800 }
4801 vm_page_lock_queues();
4802
4803 hibernate_stats.hibernate_drained++;
4804 }
4805 vm_page_unlock_queues();
4806
4807 return (0);
4808 }
4809
4810
4811 boolean_t hibernate_skip_external = FALSE;
4812
4813 static int
4814 hibernate_flush_queue(queue_head_t *q, int qcount)
4815 {
4816 vm_page_t m;
4817 vm_object_t l_object = NULL;
4818 vm_object_t m_object = NULL;
4819 int refmod_state = 0;
4820 int try_failed_count = 0;
4821 int retval = 0;
4822 int current_run = 0;
4823 struct vm_pageout_queue *iq;
4824 struct vm_pageout_queue *eq;
4825 struct vm_pageout_queue *tq;
4826
4827
4828 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
4829
4830 iq = &vm_pageout_queue_internal;
4831 eq = &vm_pageout_queue_external;
4832
4833 vm_page_lock_queues();
4834
4835 while (qcount && !queue_empty(q)) {
4836
4837 if (current_run++ == 1000) {
4838 if (hibernate_should_abort()) {
4839 retval = 1;
4840 break;
4841 }
4842 current_run = 0;
4843 }
4844
4845 m = (vm_page_t) queue_first(q);
4846 m_object = m->object;
4847
4848 /*
4849 * check to see if we currently are working
4850 * with the same object... if so, we've
4851 * already got the lock
4852 */
4853 if (m_object != l_object) {
4854 /*
4855 * the object associated with candidate page is
4856 * different from the one we were just working
4857 * with... dump the lock if we still own it
4858 */
4859 if (l_object != NULL) {
4860 vm_object_unlock(l_object);
4861 l_object = NULL;
4862 }
4863 /*
4864 * Try to lock object; since we've alread got the
4865 * page queues lock, we can only 'try' for this one.
4866 * if the 'try' fails, we need to do a mutex_pause
4867 * to allow the owner of the object lock a chance to
4868 * run...
4869 */
4870 if ( !vm_object_lock_try_scan(m_object)) {
4871
4872 if (try_failed_count > 20) {
4873 hibernate_stats.hibernate_queue_nolock++;
4874
4875 goto reenter_pg_on_q;
4876 }
4877 vm_pageout_scan_wants_object = m_object;
4878
4879 vm_page_unlock_queues();
4880 mutex_pause(try_failed_count++);
4881 vm_page_lock_queues();
4882
4883 hibernate_stats.hibernate_queue_paused++;
4884 continue;
4885 } else {
4886 l_object = m_object;
4887 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4888 }
4889 }
4890 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
4891 /*
4892 * page is not to be cleaned
4893 * put it back on the head of its queue
4894 */
4895 if (m->cleaning)
4896 hibernate_stats.hibernate_skipped_cleaning++;
4897 else
4898 hibernate_stats.hibernate_skipped_transient++;
4899
4900 goto reenter_pg_on_q;
4901 }
4902 if (m_object->copy == VM_OBJECT_NULL) {
4903 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
4904 /*
4905 * let the normal hibernate image path
4906 * deal with these
4907 */
4908 goto reenter_pg_on_q;
4909 }
4910 }
4911 if ( !m->dirty && m->pmapped) {
4912 refmod_state = pmap_get_refmod(m->phys_page);
4913
4914 if ((refmod_state & VM_MEM_MODIFIED)) {
4915 SET_PAGE_DIRTY(m, FALSE);
4916 }
4917 } else
4918 refmod_state = 0;
4919
4920 if ( !m->dirty) {
4921 /*
4922 * page is not to be cleaned
4923 * put it back on the head of its queue
4924 */
4925 if (m->precious)
4926 hibernate_stats.hibernate_skipped_precious++;
4927
4928 goto reenter_pg_on_q;
4929 }
4930
4931 if (hibernate_skip_external == TRUE && !m_object->internal) {
4932
4933 hibernate_stats.hibernate_skipped_external++;
4934
4935 goto reenter_pg_on_q;
4936 }
4937 tq = NULL;
4938
4939 if (m_object->internal) {
4940 if (VM_PAGE_Q_THROTTLED(iq))
4941 tq = iq;
4942 } else if (VM_PAGE_Q_THROTTLED(eq))
4943 tq = eq;
4944
4945 if (tq != NULL) {
4946 wait_result_t wait_result;
4947 int wait_count = 5;
4948
4949 if (l_object != NULL) {
4950 vm_object_unlock(l_object);
4951 l_object = NULL;
4952 }
4953 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
4954
4955 while (retval == 0) {
4956
4957 tq->pgo_throttled = TRUE;
4958
4959 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
4960
4961 vm_page_unlock_queues();
4962
4963 wait_result = thread_block(THREAD_CONTINUE_NULL);
4964
4965 vm_page_lock_queues();
4966
4967 if (wait_result != THREAD_TIMED_OUT)
4968 break;
4969 if (!VM_PAGE_Q_THROTTLED(tq))
4970 break;
4971
4972 if (hibernate_should_abort())
4973 retval = 1;
4974
4975 if (--wait_count == 0) {
4976
4977 hibernate_stats.hibernate_throttle_timeout++;
4978
4979 if (tq == eq) {
4980 hibernate_skip_external = TRUE;
4981 break;
4982 }
4983 retval = 1;
4984 }
4985 }
4986 if (retval)
4987 break;
4988
4989 hibernate_stats.hibernate_throttled++;
4990
4991 continue;
4992 }
4993 /*
4994 * we've already factored out pages in the laundry which
4995 * means this page can't be on the pageout queue so it's
4996 * safe to do the VM_PAGE_QUEUES_REMOVE
4997 */
4998 assert(!m->pageout_queue);
4999
5000 VM_PAGE_QUEUES_REMOVE(m);
5001
5002 if (COMPRESSED_PAGER_IS_ACTIVE)
5003 pmap_disconnect(m->phys_page);
5004
5005 vm_pageout_cluster(m, FALSE);
5006
5007 hibernate_stats.hibernate_found_dirty++;
5008
5009 goto next_pg;
5010
5011 reenter_pg_on_q:
5012 queue_remove(q, m, vm_page_t, pageq);
5013 queue_enter(q, m, vm_page_t, pageq);
5014
5015 hibernate_stats.hibernate_reentered_on_q++;
5016 next_pg:
5017 hibernate_stats.hibernate_considered++;
5018
5019 qcount--;
5020 try_failed_count = 0;
5021 }
5022 if (l_object != NULL) {
5023 vm_object_unlock(l_object);
5024 l_object = NULL;
5025 }
5026 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5027
5028 vm_page_unlock_queues();
5029
5030 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5031
5032 return (retval);
5033 }
5034
5035
5036 static int
5037 hibernate_flush_dirty_pages(int pass)
5038 {
5039 struct vm_speculative_age_q *aq;
5040 uint32_t i;
5041
5042 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5043
5044 if (vm_page_local_q) {
5045 for (i = 0; i < vm_page_local_q_count; i++)
5046 vm_page_reactivate_local(i, TRUE, FALSE);
5047 }
5048
5049 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5050 int qcount;
5051 vm_page_t m;
5052
5053 aq = &vm_page_queue_speculative[i];
5054
5055 if (queue_empty(&aq->age_q))
5056 continue;
5057 qcount = 0;
5058
5059 vm_page_lockspin_queues();
5060
5061 queue_iterate(&aq->age_q,
5062 m,
5063 vm_page_t,
5064 pageq)
5065 {
5066 qcount++;
5067 }
5068 vm_page_unlock_queues();
5069
5070 if (qcount) {
5071 if (hibernate_flush_queue(&aq->age_q, qcount))
5072 return (1);
5073 }
5074 }
5075 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5076 return (1);
5077 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5078 return (1);
5079 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5080 return (1);
5081 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5082 return (1);
5083
5084 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5085 vm_compressor_record_warmup_start();
5086
5087 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5088 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5089 vm_compressor_record_warmup_end();
5090 return (1);
5091 }
5092 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5093 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5094 vm_compressor_record_warmup_end();
5095 return (1);
5096 }
5097 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5098 vm_compressor_record_warmup_end();
5099
5100 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5101 return (1);
5102
5103 return (0);
5104 }
5105
5106
5107 int
5108 hibernate_flush_memory()
5109 {
5110 int retval;
5111
5112 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5113
5114 hibernate_cleaning_in_progress = TRUE;
5115 hibernate_skip_external = FALSE;
5116
5117 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5118
5119 if (COMPRESSED_PAGER_IS_ACTIVE) {
5120
5121 if ((retval = hibernate_flush_dirty_pages(2)) == 0) {
5122
5123 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5124
5125 vm_compressor_flush();
5126
5127 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5128 }
5129 }
5130 if (retval == 0 && consider_buffer_cache_collect != NULL) {
5131 unsigned int orig_wire_count;
5132
5133 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5134 orig_wire_count = vm_page_wire_count;
5135
5136 (void)(*consider_buffer_cache_collect)(1);
5137 consider_zone_gc(TRUE);
5138
5139 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5140
5141 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5142 }
5143 }
5144 hibernate_cleaning_in_progress = FALSE;
5145
5146 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5147
5148 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5149 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5150
5151
5152 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5153 hibernate_stats.hibernate_considered,
5154 hibernate_stats.hibernate_reentered_on_q,
5155 hibernate_stats.hibernate_found_dirty);
5156 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5157 hibernate_stats.hibernate_skipped_cleaning,
5158 hibernate_stats.hibernate_skipped_transient,
5159 hibernate_stats.hibernate_skipped_precious,
5160 hibernate_stats.hibernate_skipped_external,
5161 hibernate_stats.hibernate_queue_nolock);
5162 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5163 hibernate_stats.hibernate_queue_paused,
5164 hibernate_stats.hibernate_throttled,
5165 hibernate_stats.hibernate_throttle_timeout,
5166 hibernate_stats.hibernate_drained,
5167 hibernate_stats.hibernate_drain_timeout);
5168
5169 return (retval);
5170 }
5171
5172
5173 static void
5174 hibernate_page_list_zero(hibernate_page_list_t *list)
5175 {
5176 uint32_t bank;
5177 hibernate_bitmap_t * bitmap;
5178
5179 bitmap = &list->bank_bitmap[0];
5180 for (bank = 0; bank < list->bank_count; bank++)
5181 {
5182 uint32_t last_bit;
5183
5184 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5185 // set out-of-bound bits at end of bitmap.
5186 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5187 if (last_bit)
5188 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5189
5190 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5191 }
5192 }
5193
5194 void
5195 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5196 {
5197 uint32_t i;
5198 vm_page_t m;
5199 uint64_t start, end, timeout, nsec;
5200 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5201 clock_get_uptime(&start);
5202
5203 for (i = 0; i < gobble_count; i++)
5204 {
5205 while (VM_PAGE_NULL == (m = vm_page_grab()))
5206 {
5207 clock_get_uptime(&end);
5208 if (end >= timeout)
5209 break;
5210 VM_PAGE_WAIT();
5211 }
5212 if (!m)
5213 break;
5214 m->busy = FALSE;
5215 vm_page_gobble(m);
5216
5217 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5218 hibernate_gobble_queue = m;
5219 }
5220
5221 clock_get_uptime(&end);
5222 absolutetime_to_nanoseconds(end - start, &nsec);
5223 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5224 }
5225
5226 void
5227 hibernate_free_gobble_pages(void)
5228 {
5229 vm_page_t m, next;
5230 uint32_t count = 0;
5231
5232 m = (vm_page_t) hibernate_gobble_queue;
5233 while(m)
5234 {
5235 next = (vm_page_t) m->pageq.next;
5236 vm_page_free(m);
5237 count++;
5238 m = next;
5239 }
5240 hibernate_gobble_queue = VM_PAGE_NULL;
5241
5242 if (count)
5243 HIBLOG("Freed %d pages\n", count);
5244 }
5245
5246 static boolean_t
5247 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5248 {
5249 vm_object_t object = NULL;
5250 int refmod_state;
5251 boolean_t discard = FALSE;
5252
5253 do
5254 {
5255 if (m->private)
5256 panic("hibernate_consider_discard: private");
5257
5258 if (!vm_object_lock_try(m->object)) {
5259 if (!preflight) hibernate_stats.cd_lock_failed++;
5260 break;
5261 }
5262 object = m->object;
5263
5264 if (VM_PAGE_WIRED(m)) {
5265 if (!preflight) hibernate_stats.cd_found_wired++;
5266 break;
5267 }
5268 if (m->precious) {
5269 if (!preflight) hibernate_stats.cd_found_precious++;
5270 break;
5271 }
5272 if (m->busy || !object->alive) {
5273 /*
5274 * Somebody is playing with this page.
5275 */
5276 if (!preflight) hibernate_stats.cd_found_busy++;
5277 break;
5278 }
5279 if (m->absent || m->unusual || m->error) {
5280 /*
5281 * If it's unusual in anyway, ignore it
5282 */
5283 if (!preflight) hibernate_stats.cd_found_unusual++;
5284 break;
5285 }
5286 if (m->cleaning) {
5287 if (!preflight) hibernate_stats.cd_found_cleaning++;
5288 break;
5289 }
5290 if (m->laundry) {
5291 if (!preflight) hibernate_stats.cd_found_laundry++;
5292 break;
5293 }
5294 if (!m->dirty)
5295 {
5296 refmod_state = pmap_get_refmod(m->phys_page);
5297
5298 if (refmod_state & VM_MEM_REFERENCED)
5299 m->reference = TRUE;
5300 if (refmod_state & VM_MEM_MODIFIED) {
5301 SET_PAGE_DIRTY(m, FALSE);
5302 }
5303 }
5304
5305 /*
5306 * If it's clean or purgeable we can discard the page on wakeup.
5307 */
5308 discard = (!m->dirty)
5309 || (VM_PURGABLE_VOLATILE == object->purgable)
5310 || (VM_PURGABLE_EMPTY == object->purgable);
5311
5312
5313 if (discard == FALSE) {
5314 if (!preflight)
5315 hibernate_stats.cd_found_dirty++;
5316 } else if (m->xpmapped && m->reference) {
5317 if (!preflight)
5318 hibernate_stats.cd_found_xpmapped++;
5319 discard = FALSE;
5320 }
5321 }
5322 while (FALSE);
5323
5324 if (object)
5325 vm_object_unlock(object);
5326
5327 return (discard);
5328 }
5329
5330
5331 static void
5332 hibernate_discard_page(vm_page_t m)
5333 {
5334 if (m->absent || m->unusual || m->error)
5335 /*
5336 * If it's unusual in anyway, ignore
5337 */
5338 return;
5339
5340 #if DEBUG
5341 vm_object_t object = m->object;
5342 if (!vm_object_lock_try(m->object))
5343 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5344 #else
5345 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5346 makes sure these locks are uncontended before sleep */
5347 #endif /* !DEBUG */
5348
5349 if (m->pmapped == TRUE)
5350 {
5351 __unused int refmod_state = pmap_disconnect(m->phys_page);
5352 }
5353
5354 if (m->laundry)
5355 panic("hibernate_discard_page(%p) laundry", m);
5356 if (m->private)
5357 panic("hibernate_discard_page(%p) private", m);
5358 if (m->fictitious)
5359 panic("hibernate_discard_page(%p) fictitious", m);
5360
5361 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5362 {
5363 /* object should be on a queue */
5364 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5365 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5366 assert(old_queue);
5367 if (m->object->purgeable_when_ripe) {
5368 vm_purgeable_token_delete_first(old_queue);
5369 }
5370 m->object->purgable = VM_PURGABLE_EMPTY;
5371 }
5372
5373 vm_page_free(m);
5374
5375 #if DEBUG
5376 vm_object_unlock(object);
5377 #endif /* DEBUG */
5378 }
5379
5380 /*
5381 Grab locks for hibernate_page_list_setall()
5382 */
5383 void
5384 hibernate_vm_lock_queues(void)
5385 {
5386 vm_object_lock(compressor_object);
5387 vm_page_lock_queues();
5388 lck_mtx_lock(&vm_page_queue_free_lock);
5389
5390 if (vm_page_local_q) {
5391 uint32_t i;
5392 for (i = 0; i < vm_page_local_q_count; i++) {
5393 struct vpl *lq;
5394 lq = &vm_page_local_q[i].vpl_un.vpl;
5395 VPL_LOCK(&lq->vpl_lock);
5396 }
5397 }
5398 }
5399
5400 void
5401 hibernate_vm_unlock_queues(void)
5402 {
5403 if (vm_page_local_q) {
5404 uint32_t i;
5405 for (i = 0; i < vm_page_local_q_count; i++) {
5406 struct vpl *lq;
5407 lq = &vm_page_local_q[i].vpl_un.vpl;
5408 VPL_UNLOCK(&lq->vpl_lock);
5409 }
5410 }
5411 lck_mtx_unlock(&vm_page_queue_free_lock);
5412 vm_page_unlock_queues();
5413 vm_object_unlock(compressor_object);
5414 }
5415
5416 /*
5417 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5418 pages known to VM to not need saving are subtracted.
5419 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5420 */
5421
5422 void
5423 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5424 hibernate_page_list_t * page_list_wired,
5425 hibernate_page_list_t * page_list_pal,
5426 boolean_t preflight,
5427 boolean_t will_discard,
5428 uint32_t * pagesOut)
5429 {
5430 uint64_t start, end, nsec;
5431 vm_page_t m;
5432 vm_page_t next;
5433 uint32_t pages = page_list->page_count;
5434 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5435 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5436 uint32_t count_wire = pages;
5437 uint32_t count_discard_active = 0;
5438 uint32_t count_discard_inactive = 0;
5439 uint32_t count_discard_cleaned = 0;
5440 uint32_t count_discard_purgeable = 0;
5441 uint32_t count_discard_speculative = 0;
5442 uint32_t count_discard_vm_struct_pages = 0;
5443 uint32_t i;
5444 uint32_t bank;
5445 hibernate_bitmap_t * bitmap;
5446 hibernate_bitmap_t * bitmap_wired;
5447 boolean_t discard_all;
5448 boolean_t discard;
5449
5450 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5451
5452 if (preflight) {
5453 page_list = NULL;
5454 page_list_wired = NULL;
5455 page_list_pal = NULL;
5456 discard_all = FALSE;
5457 } else {
5458 discard_all = will_discard;
5459 }
5460
5461 #if DEBUG
5462 if (!preflight)
5463 {
5464 vm_page_lock_queues();
5465 if (vm_page_local_q) {
5466 for (i = 0; i < vm_page_local_q_count; i++) {
5467 struct vpl *lq;
5468 lq = &vm_page_local_q[i].vpl_un.vpl;
5469 VPL_LOCK(&lq->vpl_lock);
5470 }
5471 }
5472 }
5473 #endif /* DEBUG */
5474
5475
5476 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5477
5478 clock_get_uptime(&start);
5479
5480 if (!preflight) {
5481 hibernate_page_list_zero(page_list);
5482 hibernate_page_list_zero(page_list_wired);
5483 hibernate_page_list_zero(page_list_pal);
5484
5485 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5486 hibernate_stats.cd_pages = pages;
5487 }
5488
5489 if (vm_page_local_q) {
5490 for (i = 0; i < vm_page_local_q_count; i++)
5491 vm_page_reactivate_local(i, TRUE, !preflight);
5492 }
5493
5494 if (preflight) {
5495 vm_object_lock(compressor_object);
5496 vm_page_lock_queues();
5497 lck_mtx_lock(&vm_page_queue_free_lock);
5498 }
5499
5500 m = (vm_page_t) hibernate_gobble_queue;
5501 while (m)
5502 {
5503 pages--;
5504 count_wire--;
5505 if (!preflight) {
5506 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5507 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5508 }
5509 m = (vm_page_t) m->pageq.next;
5510 }
5511
5512 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5513 {
5514 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5515 {
5516 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5517 {
5518 pages--;
5519 count_wire--;
5520 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5521 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5522
5523 hibernate_stats.cd_local_free++;
5524 hibernate_stats.cd_total_free++;
5525 }
5526 }
5527 }
5528
5529 for( i = 0; i < vm_colors; i++ )
5530 {
5531 queue_iterate(&vm_page_queue_free[i],
5532 m,
5533 vm_page_t,
5534 pageq)
5535 {
5536 pages--;
5537 count_wire--;
5538 if (!preflight) {
5539 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5540 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5541
5542 hibernate_stats.cd_total_free++;
5543 }
5544 }
5545 }
5546
5547 queue_iterate(&vm_lopage_queue_free,
5548 m,
5549 vm_page_t,
5550 pageq)
5551 {
5552 pages--;
5553 count_wire--;
5554 if (!preflight) {
5555 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5556 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5557
5558 hibernate_stats.cd_total_free++;
5559 }
5560 }
5561
5562 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5563 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5564 {
5565 next = (vm_page_t) m->pageq.next;
5566 discard = FALSE;
5567 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5568 && hibernate_consider_discard(m, preflight))
5569 {
5570 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5571 count_discard_inactive++;
5572 discard = discard_all;
5573 }
5574 else
5575 count_throttled++;
5576 count_wire--;
5577 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5578
5579 if (discard) hibernate_discard_page(m);
5580 m = next;
5581 }
5582
5583 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5584 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5585 {
5586 next = (vm_page_t) m->pageq.next;
5587 discard = FALSE;
5588 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5589 && hibernate_consider_discard(m, preflight))
5590 {
5591 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5592 if (m->dirty)
5593 count_discard_purgeable++;
5594 else
5595 count_discard_inactive++;
5596 discard = discard_all;
5597 }
5598 else
5599 count_anonymous++;
5600 count_wire--;
5601 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5602 if (discard) hibernate_discard_page(m);
5603 m = next;
5604 }
5605
5606 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5607 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5608 {
5609 next = (vm_page_t) m->pageq.next;
5610 discard = FALSE;
5611 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5612 && hibernate_consider_discard(m, preflight))
5613 {
5614 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5615 if (m->dirty)
5616 count_discard_purgeable++;
5617 else
5618 count_discard_inactive++;
5619 discard = discard_all;
5620 }
5621 else
5622 count_inactive++;
5623 count_wire--;
5624 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5625 if (discard) hibernate_discard_page(m);
5626 m = next;
5627 }
5628
5629 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5630 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5631 {
5632 next = (vm_page_t) m->pageq.next;
5633 discard = FALSE;
5634 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5635 && hibernate_consider_discard(m, preflight))
5636 {
5637 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5638 if (m->dirty)
5639 count_discard_purgeable++;
5640 else
5641 count_discard_cleaned++;
5642 discard = discard_all;
5643 }
5644 else
5645 count_cleaned++;
5646 count_wire--;
5647 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5648 if (discard) hibernate_discard_page(m);
5649 m = next;
5650 }
5651
5652 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5653 {
5654 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5655 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5656 {
5657 next = (vm_page_t) m->pageq.next;
5658 discard = FALSE;
5659 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5660 && hibernate_consider_discard(m, preflight))
5661 {
5662 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5663 count_discard_speculative++;
5664 discard = discard_all;
5665 }
5666 else
5667 count_speculative++;
5668 count_wire--;
5669 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5670 if (discard) hibernate_discard_page(m);
5671 m = next;
5672 }
5673 }
5674
5675 m = (vm_page_t) queue_first(&vm_page_queue_active);
5676 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5677 {
5678 next = (vm_page_t) m->pageq.next;
5679 discard = FALSE;
5680 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5681 && hibernate_consider_discard(m, preflight))
5682 {
5683 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5684 if (m->dirty)
5685 count_discard_purgeable++;
5686 else
5687 count_discard_active++;
5688 discard = discard_all;
5689 }
5690 else
5691 count_active++;
5692 count_wire--;
5693 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5694 if (discard) hibernate_discard_page(m);
5695 m = next;
5696 }
5697
5698 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5699 {
5700 count_compressor++;
5701 count_wire--;
5702 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5703 }
5704
5705 if (preflight == FALSE && discard_all == TRUE) {
5706 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5707
5708 HIBLOG("hibernate_teardown started\n");
5709 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5710 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5711
5712 pages -= count_discard_vm_struct_pages;
5713 count_wire -= count_discard_vm_struct_pages;
5714
5715 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5716
5717 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5718 }
5719
5720 if (!preflight) {
5721 // pull wired from hibernate_bitmap
5722 bitmap = &page_list->bank_bitmap[0];
5723 bitmap_wired = &page_list_wired->bank_bitmap[0];
5724 for (bank = 0; bank < page_list->bank_count; bank++)
5725 {
5726 for (i = 0; i < bitmap->bitmapwords; i++)
5727 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5728 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5729 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5730 }
5731 }
5732
5733 // machine dependent adjustments
5734 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
5735
5736 if (!preflight) {
5737 hibernate_stats.cd_count_wire = count_wire;
5738 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
5739 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
5740 }
5741
5742 clock_get_uptime(&end);
5743 absolutetime_to_nanoseconds(end - start, &nsec);
5744 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
5745
5746 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
5747 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
5748 discard_all ? "did" : "could",
5749 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5750
5751 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
5752
5753 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
5754
5755 #if DEBUG
5756 if (!preflight)
5757 {
5758 if (vm_page_local_q) {
5759 for (i = 0; i < vm_page_local_q_count; i++) {
5760 struct vpl *lq;
5761 lq = &vm_page_local_q[i].vpl_un.vpl;
5762 VPL_UNLOCK(&lq->vpl_lock);
5763 }
5764 }
5765 vm_page_unlock_queues();
5766 }
5767 #endif /* DEBUG */
5768
5769 if (preflight) {
5770 lck_mtx_unlock(&vm_page_queue_free_lock);
5771 vm_page_unlock_queues();
5772 vm_object_unlock(compressor_object);
5773 }
5774
5775 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
5776 }
5777
5778 void
5779 hibernate_page_list_discard(hibernate_page_list_t * page_list)
5780 {
5781 uint64_t start, end, nsec;
5782 vm_page_t m;
5783 vm_page_t next;
5784 uint32_t i;
5785 uint32_t count_discard_active = 0;
5786 uint32_t count_discard_inactive = 0;
5787 uint32_t count_discard_purgeable = 0;
5788 uint32_t count_discard_cleaned = 0;
5789 uint32_t count_discard_speculative = 0;
5790
5791
5792 #if DEBUG
5793 vm_page_lock_queues();
5794 if (vm_page_local_q) {
5795 for (i = 0; i < vm_page_local_q_count; i++) {
5796 struct vpl *lq;
5797 lq = &vm_page_local_q[i].vpl_un.vpl;
5798 VPL_LOCK(&lq->vpl_lock);
5799 }
5800 }
5801 #endif /* DEBUG */
5802
5803 clock_get_uptime(&start);
5804
5805 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5806 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5807 {
5808 next = (vm_page_t) m->pageq.next;
5809 if (hibernate_page_bittst(page_list, m->phys_page))
5810 {
5811 if (m->dirty)
5812 count_discard_purgeable++;
5813 else
5814 count_discard_inactive++;
5815 hibernate_discard_page(m);
5816 }
5817 m = next;
5818 }
5819
5820 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5821 {
5822 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5823 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5824 {
5825 next = (vm_page_t) m->pageq.next;
5826 if (hibernate_page_bittst(page_list, m->phys_page))
5827 {
5828 count_discard_speculative++;
5829 hibernate_discard_page(m);
5830 }
5831 m = next;
5832 }
5833 }
5834
5835 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5836 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5837 {
5838 next = (vm_page_t) m->pageq.next;
5839 if (hibernate_page_bittst(page_list, m->phys_page))
5840 {
5841 if (m->dirty)
5842 count_discard_purgeable++;
5843 else
5844 count_discard_inactive++;
5845 hibernate_discard_page(m);
5846 }
5847 m = next;
5848 }
5849
5850 m = (vm_page_t) queue_first(&vm_page_queue_active);
5851 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5852 {
5853 next = (vm_page_t) m->pageq.next;
5854 if (hibernate_page_bittst(page_list, m->phys_page))
5855 {
5856 if (m->dirty)
5857 count_discard_purgeable++;
5858 else
5859 count_discard_active++;
5860 hibernate_discard_page(m);
5861 }
5862 m = next;
5863 }
5864
5865 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5866 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5867 {
5868 next = (vm_page_t) m->pageq.next;
5869 if (hibernate_page_bittst(page_list, m->phys_page))
5870 {
5871 if (m->dirty)
5872 count_discard_purgeable++;
5873 else
5874 count_discard_cleaned++;
5875 hibernate_discard_page(m);
5876 }
5877 m = next;
5878 }
5879
5880 #if DEBUG
5881 if (vm_page_local_q) {
5882 for (i = 0; i < vm_page_local_q_count; i++) {
5883 struct vpl *lq;
5884 lq = &vm_page_local_q[i].vpl_un.vpl;
5885 VPL_UNLOCK(&lq->vpl_lock);
5886 }
5887 }
5888 vm_page_unlock_queues();
5889 #endif /* DEBUG */
5890
5891 clock_get_uptime(&end);
5892 absolutetime_to_nanoseconds(end - start, &nsec);
5893 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
5894 nsec / 1000000ULL,
5895 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
5896 }
5897
5898 boolean_t hibernate_paddr_map_inited = FALSE;
5899 boolean_t hibernate_rebuild_needed = FALSE;
5900 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
5901 vm_page_t hibernate_rebuild_hash_list = NULL;
5902
5903 unsigned int hibernate_teardown_found_tabled_pages = 0;
5904 unsigned int hibernate_teardown_found_created_pages = 0;
5905 unsigned int hibernate_teardown_found_free_pages = 0;
5906 unsigned int hibernate_teardown_vm_page_free_count;
5907
5908
5909 struct ppnum_mapping {
5910 struct ppnum_mapping *ppnm_next;
5911 ppnum_t ppnm_base_paddr;
5912 unsigned int ppnm_sindx;
5913 unsigned int ppnm_eindx;
5914 };
5915
5916 struct ppnum_mapping *ppnm_head;
5917 struct ppnum_mapping *ppnm_last_found = NULL;
5918
5919
5920 void
5921 hibernate_create_paddr_map()
5922 {
5923 unsigned int i;
5924 ppnum_t next_ppnum_in_run = 0;
5925 struct ppnum_mapping *ppnm = NULL;
5926
5927 if (hibernate_paddr_map_inited == FALSE) {
5928
5929 for (i = 0; i < vm_pages_count; i++) {
5930
5931 if (ppnm)
5932 ppnm->ppnm_eindx = i;
5933
5934 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
5935
5936 ppnm = kalloc(sizeof(struct ppnum_mapping));
5937
5938 ppnm->ppnm_next = ppnm_head;
5939 ppnm_head = ppnm;
5940
5941 ppnm->ppnm_sindx = i;
5942 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
5943 }
5944 next_ppnum_in_run = vm_pages[i].phys_page + 1;
5945 }
5946 ppnm->ppnm_eindx++;
5947
5948 hibernate_paddr_map_inited = TRUE;
5949 }
5950 }
5951
5952 ppnum_t
5953 hibernate_lookup_paddr(unsigned int indx)
5954 {
5955 struct ppnum_mapping *ppnm = NULL;
5956
5957 ppnm = ppnm_last_found;
5958
5959 if (ppnm) {
5960 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
5961 goto done;
5962 }
5963 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
5964
5965 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
5966 ppnm_last_found = ppnm;
5967 break;
5968 }
5969 }
5970 if (ppnm == NULL)
5971 panic("hibernate_lookup_paddr of %d failed\n", indx);
5972 done:
5973 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
5974 }
5975
5976
5977 uint32_t
5978 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
5979 {
5980 addr64_t saddr_aligned;
5981 addr64_t eaddr_aligned;
5982 addr64_t addr;
5983 ppnum_t paddr;
5984 unsigned int mark_as_unneeded_pages = 0;
5985
5986 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
5987 eaddr_aligned = eaddr & ~PAGE_MASK_64;
5988
5989 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
5990
5991 paddr = pmap_find_phys(kernel_pmap, addr);
5992
5993 assert(paddr);
5994
5995 hibernate_page_bitset(page_list, TRUE, paddr);
5996 hibernate_page_bitset(page_list_wired, TRUE, paddr);
5997
5998 mark_as_unneeded_pages++;
5999 }
6000 return (mark_as_unneeded_pages);
6001 }
6002
6003
6004 void
6005 hibernate_hash_insert_page(vm_page_t mem)
6006 {
6007 vm_page_bucket_t *bucket;
6008 int hash_id;
6009
6010 assert(mem->tabled);
6011 assert(mem->object);
6012 assert(mem->offset != (vm_object_offset_t) -1);
6013
6014 /*
6015 * Insert it into the object_object/offset hash table
6016 */
6017 hash_id = vm_page_hash(mem->object, mem->offset);
6018 bucket = &vm_page_buckets[hash_id];
6019
6020 mem->next = bucket->pages;
6021 bucket->pages = mem;
6022 }
6023
6024
6025 void
6026 hibernate_free_range(int sindx, int eindx)
6027 {
6028 vm_page_t mem;
6029 unsigned int color;
6030
6031 while (sindx < eindx) {
6032 mem = &vm_pages[sindx];
6033
6034 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6035
6036 mem->lopage = FALSE;
6037 mem->free = TRUE;
6038
6039 color = mem->phys_page & vm_color_mask;
6040 queue_enter_first(&vm_page_queue_free[color],
6041 mem,
6042 vm_page_t,
6043 pageq);
6044 vm_page_free_count++;
6045
6046 sindx++;
6047 }
6048 }
6049
6050
6051 extern void hibernate_rebuild_pmap_structs(void);
6052
6053 void
6054 hibernate_rebuild_vm_structs(void)
6055 {
6056 int cindx, sindx, eindx;
6057 vm_page_t mem, tmem, mem_next;
6058 AbsoluteTime startTime, endTime;
6059 uint64_t nsec;
6060
6061 if (hibernate_rebuild_needed == FALSE)
6062 return;
6063
6064 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6065 HIBLOG("hibernate_rebuild started\n");
6066
6067 clock_get_uptime(&startTime);
6068
6069 hibernate_rebuild_pmap_structs();
6070
6071 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6072 eindx = vm_pages_count;
6073
6074 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6075
6076 mem = &vm_pages[cindx];
6077 /*
6078 * hibernate_teardown_vm_structs leaves the location where
6079 * this vm_page_t must be located in "next".
6080 */
6081 tmem = mem->next;
6082 mem->next = NULL;
6083
6084 sindx = (int)(tmem - &vm_pages[0]);
6085
6086 if (mem != tmem) {
6087 /*
6088 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6089 * so move it back to its real location
6090 */
6091 *tmem = *mem;
6092 mem = tmem;
6093 }
6094 if (mem->tabled)
6095 hibernate_hash_insert_page(mem);
6096 /*
6097 * the 'hole' between this vm_page_t and the previous
6098 * vm_page_t we moved needs to be initialized as
6099 * a range of free vm_page_t's
6100 */
6101 hibernate_free_range(sindx + 1, eindx);
6102
6103 eindx = sindx;
6104 }
6105 if (sindx)
6106 hibernate_free_range(0, sindx);
6107
6108 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6109
6110 /*
6111 * process the list of vm_page_t's that were tabled in the hash,
6112 * but were not located in the vm_pages arrary... these are
6113 * vm_page_t's that were created on the fly (i.e. fictitious)
6114 */
6115 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6116 mem_next = mem->next;
6117
6118 mem->next = NULL;
6119 hibernate_hash_insert_page(mem);
6120 }
6121 hibernate_rebuild_hash_list = NULL;
6122
6123 clock_get_uptime(&endTime);
6124 SUB_ABSOLUTETIME(&endTime, &startTime);
6125 absolutetime_to_nanoseconds(endTime, &nsec);
6126
6127 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6128
6129 hibernate_rebuild_needed = FALSE;
6130
6131 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6132 }
6133
6134
6135 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6136
6137 uint32_t
6138 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6139 {
6140 unsigned int i;
6141 unsigned int compact_target_indx;
6142 vm_page_t mem, mem_next;
6143 vm_page_bucket_t *bucket;
6144 unsigned int mark_as_unneeded_pages = 0;
6145 unsigned int unneeded_vm_page_bucket_pages = 0;
6146 unsigned int unneeded_vm_pages_pages = 0;
6147 unsigned int unneeded_pmap_pages = 0;
6148 addr64_t start_of_unneeded = 0;
6149 addr64_t end_of_unneeded = 0;
6150
6151
6152 if (hibernate_should_abort())
6153 return (0);
6154
6155 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6156 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6157 vm_page_cleaned_count, compressor_object->resident_page_count);
6158
6159 for (i = 0; i < vm_page_bucket_count; i++) {
6160
6161 bucket = &vm_page_buckets[i];
6162
6163 for (mem = bucket->pages; mem != VM_PAGE_NULL; mem = mem_next) {
6164
6165 assert(mem->tabled);
6166
6167 mem_next = mem->next;
6168
6169 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6170 mem->next = hibernate_rebuild_hash_list;
6171 hibernate_rebuild_hash_list = mem;
6172 }
6173 }
6174 }
6175 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6176 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6177
6178 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6179
6180 compact_target_indx = 0;
6181
6182 for (i = 0; i < vm_pages_count; i++) {
6183
6184 mem = &vm_pages[i];
6185
6186 if (mem->free) {
6187 unsigned int color;
6188
6189 assert(mem->busy);
6190 assert(!mem->lopage);
6191
6192 color = mem->phys_page & vm_color_mask;
6193
6194 queue_remove(&vm_page_queue_free[color],
6195 mem,
6196 vm_page_t,
6197 pageq);
6198 mem->pageq.next = NULL;
6199 mem->pageq.prev = NULL;
6200
6201 vm_page_free_count--;
6202
6203 hibernate_teardown_found_free_pages++;
6204
6205 if ( !vm_pages[compact_target_indx].free)
6206 compact_target_indx = i;
6207 } else {
6208 /*
6209 * record this vm_page_t's original location
6210 * we need this even if it doesn't get moved
6211 * as an indicator to the rebuild function that
6212 * we don't have to move it
6213 */
6214 mem->next = mem;
6215
6216 if (vm_pages[compact_target_indx].free) {
6217 /*
6218 * we've got a hole to fill, so
6219 * move this vm_page_t to it's new home
6220 */
6221 vm_pages[compact_target_indx] = *mem;
6222 mem->free = TRUE;
6223
6224 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6225 compact_target_indx++;
6226 } else
6227 hibernate_teardown_last_valid_compact_indx = i;
6228 }
6229 }
6230 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6231 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6232 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6233
6234 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6235
6236 if (start_of_unneeded) {
6237 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6238 mark_as_unneeded_pages += unneeded_pmap_pages;
6239 }
6240 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6241
6242 hibernate_rebuild_needed = TRUE;
6243
6244 return (mark_as_unneeded_pages);
6245 }
6246
6247
6248 #endif /* HIBERNATION */
6249
6250 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6251
6252 #include <mach_vm_debug.h>
6253 #if MACH_VM_DEBUG
6254
6255 #include <mach_debug/hash_info.h>
6256 #include <vm/vm_debug.h>
6257
6258 /*
6259 * Routine: vm_page_info
6260 * Purpose:
6261 * Return information about the global VP table.
6262 * Fills the buffer with as much information as possible
6263 * and returns the desired size of the buffer.
6264 * Conditions:
6265 * Nothing locked. The caller should provide
6266 * possibly-pageable memory.
6267 */
6268
6269 unsigned int
6270 vm_page_info(
6271 hash_info_bucket_t *info,
6272 unsigned int count)
6273 {
6274 unsigned int i;
6275 lck_spin_t *bucket_lock;
6276
6277 if (vm_page_bucket_count < count)
6278 count = vm_page_bucket_count;
6279
6280 for (i = 0; i < count; i++) {
6281 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6282 unsigned int bucket_count = 0;
6283 vm_page_t m;
6284
6285 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6286 lck_spin_lock(bucket_lock);
6287
6288 for (m = bucket->pages; m != VM_PAGE_NULL; m = m->next)
6289 bucket_count++;
6290
6291 lck_spin_unlock(bucket_lock);
6292
6293 /* don't touch pageable memory while holding locks */
6294 info[i].hib_count = bucket_count;
6295 }
6296
6297 return vm_page_bucket_count;
6298 }
6299 #endif /* MACH_VM_DEBUG */