]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-3248.60.10.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
68
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
72 #include <mach/sdt.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/task.h>
76 #include <kern/thread.h>
77 #include <kern/kalloc.h>
78 #include <kern/zalloc.h>
79 #include <kern/xpr.h>
80 #include <kern/ledger.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_init.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_pageout.h>
86 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
87 #include <kern/misc_protos.h>
88 #include <zone_debug.h>
89 #include <mach_debug/zone_info.h>
90 #include <vm/cpm.h>
91 #include <pexpert/pexpert.h>
92
93 #include <vm/vm_protos.h>
94 #include <vm/memory_object.h>
95 #include <vm/vm_purgeable_internal.h>
96 #include <vm/vm_compressor.h>
97
98 #if CONFIG_PHANTOM_CACHE
99 #include <vm/vm_phantom_cache.h>
100 #endif
101
102 #include <IOKit/IOHibernatePrivate.h>
103
104 #include <sys/kdebug.h>
105
106 boolean_t hibernate_cleaning_in_progress = FALSE;
107 boolean_t vm_page_free_verify = TRUE;
108
109 uint32_t vm_lopage_free_count = 0;
110 uint32_t vm_lopage_free_limit = 0;
111 uint32_t vm_lopage_lowater = 0;
112 boolean_t vm_lopage_refill = FALSE;
113 boolean_t vm_lopage_needed = FALSE;
114
115 lck_mtx_ext_t vm_page_queue_lock_ext;
116 lck_mtx_ext_t vm_page_queue_free_lock_ext;
117 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
118
119 int speculative_age_index = 0;
120 int speculative_steal_index = 0;
121 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
122
123
124 __private_extern__ void vm_page_init_lck_grp(void);
125
126 static void vm_page_free_prepare(vm_page_t page);
127 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
128
129 static void vm_tag_init(void);
130
131 uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
132
133 /*
134 * Associated with page of user-allocatable memory is a
135 * page structure.
136 */
137
138 /*
139 * These variables record the values returned by vm_page_bootstrap,
140 * for debugging purposes. The implementation of pmap_steal_memory
141 * and pmap_startup here also uses them internally.
142 */
143
144 vm_offset_t virtual_space_start;
145 vm_offset_t virtual_space_end;
146 uint32_t vm_page_pages;
147
148 /*
149 * The vm_page_lookup() routine, which provides for fast
150 * (virtual memory object, offset) to page lookup, employs
151 * the following hash table. The vm_page_{insert,remove}
152 * routines install and remove associations in the table.
153 * [This table is often called the virtual-to-physical,
154 * or VP, table.]
155 */
156 typedef struct {
157 vm_page_packed_t page_list;
158 #if MACH_PAGE_HASH_STATS
159 int cur_count; /* current count */
160 int hi_count; /* high water mark */
161 #endif /* MACH_PAGE_HASH_STATS */
162 } vm_page_bucket_t;
163
164
165 #define BUCKETS_PER_LOCK 16
166
167 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
168 unsigned int vm_page_bucket_count = 0; /* How big is array? */
169 unsigned int vm_page_hash_mask; /* Mask for hash function */
170 unsigned int vm_page_hash_shift; /* Shift for hash function */
171 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
172 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
173
174 lck_spin_t *vm_page_bucket_locks;
175 lck_spin_t vm_objects_wired_lock;
176 lck_spin_t vm_allocation_sites_lock;
177
178 #if VM_PAGE_BUCKETS_CHECK
179 boolean_t vm_page_buckets_check_ready = FALSE;
180 #if VM_PAGE_FAKE_BUCKETS
181 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
182 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
183 #endif /* VM_PAGE_FAKE_BUCKETS */
184 #endif /* VM_PAGE_BUCKETS_CHECK */
185
186 extern int not_in_kdp;
187
188
189 #if MACH_PAGE_HASH_STATS
190 /* This routine is only for debug. It is intended to be called by
191 * hand by a developer using a kernel debugger. This routine prints
192 * out vm_page_hash table statistics to the kernel debug console.
193 */
194 void
195 hash_debug(void)
196 {
197 int i;
198 int numbuckets = 0;
199 int highsum = 0;
200 int maxdepth = 0;
201
202 for (i = 0; i < vm_page_bucket_count; i++) {
203 if (vm_page_buckets[i].hi_count) {
204 numbuckets++;
205 highsum += vm_page_buckets[i].hi_count;
206 if (vm_page_buckets[i].hi_count > maxdepth)
207 maxdepth = vm_page_buckets[i].hi_count;
208 }
209 }
210 printf("Total number of buckets: %d\n", vm_page_bucket_count);
211 printf("Number used buckets: %d = %d%%\n",
212 numbuckets, 100*numbuckets/vm_page_bucket_count);
213 printf("Number unused buckets: %d = %d%%\n",
214 vm_page_bucket_count - numbuckets,
215 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
216 printf("Sum of bucket max depth: %d\n", highsum);
217 printf("Average bucket depth: %d.%2d\n",
218 highsum/vm_page_bucket_count,
219 highsum%vm_page_bucket_count);
220 printf("Maximum bucket depth: %d\n", maxdepth);
221 }
222 #endif /* MACH_PAGE_HASH_STATS */
223
224 /*
225 * The virtual page size is currently implemented as a runtime
226 * variable, but is constant once initialized using vm_set_page_size.
227 * This initialization must be done in the machine-dependent
228 * bootstrap sequence, before calling other machine-independent
229 * initializations.
230 *
231 * All references to the virtual page size outside this
232 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
233 * constants.
234 */
235 vm_size_t page_size = PAGE_SIZE;
236 vm_size_t page_mask = PAGE_MASK;
237 int page_shift = PAGE_SHIFT;
238
239 /*
240 * Resident page structures are initialized from
241 * a template (see vm_page_alloc).
242 *
243 * When adding a new field to the virtual memory
244 * object structure, be sure to add initialization
245 * (see vm_page_bootstrap).
246 */
247 struct vm_page vm_page_template;
248
249 vm_page_t vm_pages = VM_PAGE_NULL;
250 unsigned int vm_pages_count = 0;
251 ppnum_t vm_page_lowest = 0;
252
253 /*
254 * Resident pages that represent real memory
255 * are allocated from a set of free lists,
256 * one per color.
257 */
258 unsigned int vm_colors;
259 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
260 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
261 unsigned int vm_free_magazine_refill_limit = 0;
262 queue_head_t vm_page_queue_free[MAX_COLORS];
263 unsigned int vm_page_free_wanted;
264 unsigned int vm_page_free_wanted_privileged;
265 unsigned int vm_page_free_count;
266 unsigned int vm_page_fictitious_count;
267
268 /*
269 * Occasionally, the virtual memory system uses
270 * resident page structures that do not refer to
271 * real pages, for example to leave a page with
272 * important state information in the VP table.
273 *
274 * These page structures are allocated the way
275 * most other kernel structures are.
276 */
277 zone_t vm_page_zone;
278 vm_locks_array_t vm_page_locks;
279 decl_lck_mtx_data(,vm_page_alloc_lock)
280 lck_mtx_ext_t vm_page_alloc_lock_ext;
281
282 unsigned int io_throttle_zero_fill;
283
284 unsigned int vm_page_local_q_count = 0;
285 unsigned int vm_page_local_q_soft_limit = 250;
286 unsigned int vm_page_local_q_hard_limit = 500;
287 struct vplq *vm_page_local_q = NULL;
288
289 /* N.B. Guard and fictitious pages must not
290 * be assigned a zero phys_page value.
291 */
292 /*
293 * Fictitious pages don't have a physical address,
294 * but we must initialize phys_page to something.
295 * For debugging, this should be a strange value
296 * that the pmap module can recognize in assertions.
297 */
298 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
299
300 /*
301 * Guard pages are not accessible so they don't
302 * need a physical address, but we need to enter
303 * one in the pmap.
304 * Let's make it recognizable and make sure that
305 * we don't use a real physical page with that
306 * physical address.
307 */
308 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
309
310 /*
311 * Resident page structures are also chained on
312 * queues that are used by the page replacement
313 * system (pageout daemon). These queues are
314 * defined here, but are shared by the pageout
315 * module. The inactive queue is broken into
316 * file backed and anonymous for convenience as the
317 * pageout daemon often assignes a higher
318 * importance to anonymous pages (less likely to pick)
319 */
320 queue_head_t vm_page_queue_active;
321 queue_head_t vm_page_queue_inactive;
322 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
323 queue_head_t vm_page_queue_throttled;
324
325 queue_head_t vm_objects_wired;
326
327 unsigned int vm_page_active_count;
328 unsigned int vm_page_inactive_count;
329 unsigned int vm_page_anonymous_count;
330 unsigned int vm_page_throttled_count;
331 unsigned int vm_page_speculative_count;
332
333 unsigned int vm_page_wire_count;
334 unsigned int vm_page_stolen_count;
335 unsigned int vm_page_wire_count_initial;
336 unsigned int vm_page_pages_initial;
337 unsigned int vm_page_gobble_count = 0;
338
339 #define VM_PAGE_WIRE_COUNT_WARNING 0
340 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
341
342 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
343 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
344 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
345
346 unsigned int vm_page_xpmapped_external_count = 0;
347 unsigned int vm_page_external_count = 0;
348 unsigned int vm_page_internal_count = 0;
349 unsigned int vm_page_pageable_external_count = 0;
350 unsigned int vm_page_pageable_internal_count = 0;
351
352 #if DEVELOPMENT || DEBUG
353 unsigned int vm_page_speculative_recreated = 0;
354 unsigned int vm_page_speculative_created = 0;
355 unsigned int vm_page_speculative_used = 0;
356 #endif
357
358 queue_head_t vm_page_queue_cleaned;
359
360 unsigned int vm_page_cleaned_count = 0;
361 unsigned int vm_pageout_enqueued_cleaned = 0;
362
363 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
364 ppnum_t max_valid_low_ppnum = 0xffffffff;
365
366
367 /*
368 * Several page replacement parameters are also
369 * shared with this module, so that page allocation
370 * (done here in vm_page_alloc) can trigger the
371 * pageout daemon.
372 */
373 unsigned int vm_page_free_target = 0;
374 unsigned int vm_page_free_min = 0;
375 unsigned int vm_page_throttle_limit = 0;
376 unsigned int vm_page_inactive_target = 0;
377 unsigned int vm_page_anonymous_min = 0;
378 unsigned int vm_page_inactive_min = 0;
379 unsigned int vm_page_free_reserved = 0;
380 unsigned int vm_page_throttle_count = 0;
381
382
383 /*
384 * The VM system has a couple of heuristics for deciding
385 * that pages are "uninteresting" and should be placed
386 * on the inactive queue as likely candidates for replacement.
387 * These variables let the heuristics be controlled at run-time
388 * to make experimentation easier.
389 */
390
391 boolean_t vm_page_deactivate_hint = TRUE;
392
393 struct vm_page_stats_reusable vm_page_stats_reusable;
394
395 /*
396 * vm_set_page_size:
397 *
398 * Sets the page size, perhaps based upon the memory
399 * size. Must be called before any use of page-size
400 * dependent functions.
401 *
402 * Sets page_shift and page_mask from page_size.
403 */
404 void
405 vm_set_page_size(void)
406 {
407 page_size = PAGE_SIZE;
408 page_mask = PAGE_MASK;
409 page_shift = PAGE_SHIFT;
410
411 if ((page_mask & page_size) != 0)
412 panic("vm_set_page_size: page size not a power of two");
413
414 for (page_shift = 0; ; page_shift++)
415 if ((1U << page_shift) == page_size)
416 break;
417 }
418
419 #define COLOR_GROUPS_TO_STEAL 4
420
421
422 /* Called once during statup, once the cache geometry is known.
423 */
424 static void
425 vm_page_set_colors( void )
426 {
427 unsigned int n, override;
428
429 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
430 n = override;
431 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
432 n = vm_cache_geometry_colors;
433 else n = DEFAULT_COLORS; /* use default if all else fails */
434
435 if ( n == 0 )
436 n = 1;
437 if ( n > MAX_COLORS )
438 n = MAX_COLORS;
439
440 /* the count must be a power of 2 */
441 if ( ( n & (n - 1)) != 0 )
442 panic("vm_page_set_colors");
443
444 vm_colors = n;
445 vm_color_mask = n - 1;
446
447 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
448 }
449
450
451 lck_grp_t vm_page_lck_grp_free;
452 lck_grp_t vm_page_lck_grp_queue;
453 lck_grp_t vm_page_lck_grp_local;
454 lck_grp_t vm_page_lck_grp_purge;
455 lck_grp_t vm_page_lck_grp_alloc;
456 lck_grp_t vm_page_lck_grp_bucket;
457 lck_grp_attr_t vm_page_lck_grp_attr;
458 lck_attr_t vm_page_lck_attr;
459
460
461 __private_extern__ void
462 vm_page_init_lck_grp(void)
463 {
464 /*
465 * initialze the vm_page lock world
466 */
467 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
468 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
469 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
470 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
471 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
472 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
473 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
474 lck_attr_setdefault(&vm_page_lck_attr);
475 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
476
477 vm_compressor_init_locks();
478 }
479
480 void
481 vm_page_init_local_q()
482 {
483 unsigned int num_cpus;
484 unsigned int i;
485 struct vplq *t_local_q;
486
487 num_cpus = ml_get_max_cpus();
488
489 /*
490 * no point in this for a uni-processor system
491 */
492 if (num_cpus >= 2) {
493 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
494
495 for (i = 0; i < num_cpus; i++) {
496 struct vpl *lq;
497
498 lq = &t_local_q[i].vpl_un.vpl;
499 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
500 queue_init(&lq->vpl_queue);
501 lq->vpl_count = 0;
502 lq->vpl_internal_count = 0;
503 lq->vpl_external_count = 0;
504 }
505 vm_page_local_q_count = num_cpus;
506
507 vm_page_local_q = (struct vplq *)t_local_q;
508 }
509 }
510
511
512 /*
513 * vm_page_bootstrap:
514 *
515 * Initializes the resident memory module.
516 *
517 * Allocates memory for the page cells, and
518 * for the object/offset-to-page hash table headers.
519 * Each page cell is initialized and placed on the free list.
520 * Returns the range of available kernel virtual memory.
521 */
522
523 void
524 vm_page_bootstrap(
525 vm_offset_t *startp,
526 vm_offset_t *endp)
527 {
528 register vm_page_t m;
529 unsigned int i;
530 unsigned int log1;
531 unsigned int log2;
532 unsigned int size;
533
534 /*
535 * Initialize the vm_page template.
536 */
537
538 m = &vm_page_template;
539 bzero(m, sizeof (*m));
540
541 m->pageq.next = NULL;
542 m->pageq.prev = NULL;
543 m->listq.next = NULL;
544 m->listq.prev = NULL;
545 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
546
547 m->object = VM_OBJECT_NULL; /* reset later */
548 m->offset = (vm_object_offset_t) -1; /* reset later */
549
550 m->wire_count = 0;
551 m->local = FALSE;
552 m->inactive = FALSE;
553 m->active = FALSE;
554 m->pageout_queue = FALSE;
555 m->speculative = FALSE;
556 m->laundry = FALSE;
557 m->free = FALSE;
558 m->reference = FALSE;
559 m->gobbled = FALSE;
560 m->private = FALSE;
561 m->throttled = FALSE;
562 m->__unused_pageq_bits = 0;
563
564 m->phys_page = 0; /* reset later */
565
566 m->busy = TRUE;
567 m->wanted = FALSE;
568 m->tabled = FALSE;
569 m->hashed = FALSE;
570 m->fictitious = FALSE;
571 m->pmapped = FALSE;
572 m->wpmapped = FALSE;
573 m->pageout = FALSE;
574 m->absent = FALSE;
575 m->error = FALSE;
576 m->dirty = FALSE;
577 m->cleaning = FALSE;
578 m->precious = FALSE;
579 m->clustered = FALSE;
580 m->overwriting = FALSE;
581 m->restart = FALSE;
582 m->unusual = FALSE;
583 m->encrypted = FALSE;
584 m->encrypted_cleaning = FALSE;
585 m->cs_validated = FALSE;
586 m->cs_tainted = FALSE;
587 m->cs_nx = FALSE;
588 m->no_cache = FALSE;
589 m->reusable = FALSE;
590 m->slid = FALSE;
591 m->xpmapped = FALSE;
592 m->compressor = FALSE;
593 m->written_by_kernel = FALSE;
594 m->__unused_object_bits = 0;
595
596 /*
597 * Initialize the page queues.
598 */
599 vm_page_init_lck_grp();
600
601 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
602 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
603 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
604
605 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
606 int group;
607
608 purgeable_queues[i].token_q_head = 0;
609 purgeable_queues[i].token_q_tail = 0;
610 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
611 queue_init(&purgeable_queues[i].objq[group]);
612
613 purgeable_queues[i].type = i;
614 purgeable_queues[i].new_pages = 0;
615 #if MACH_ASSERT
616 purgeable_queues[i].debug_count_tokens = 0;
617 purgeable_queues[i].debug_count_objects = 0;
618 #endif
619 };
620 purgeable_nonvolatile_count = 0;
621 queue_init(&purgeable_nonvolatile_queue);
622
623 for (i = 0; i < MAX_COLORS; i++ )
624 queue_init(&vm_page_queue_free[i]);
625
626 queue_init(&vm_lopage_queue_free);
627 queue_init(&vm_page_queue_active);
628 queue_init(&vm_page_queue_inactive);
629 queue_init(&vm_page_queue_cleaned);
630 queue_init(&vm_page_queue_throttled);
631 queue_init(&vm_page_queue_anonymous);
632 queue_init(&vm_objects_wired);
633
634 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
635 queue_init(&vm_page_queue_speculative[i].age_q);
636
637 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
638 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
639 }
640 vm_page_free_wanted = 0;
641 vm_page_free_wanted_privileged = 0;
642
643 vm_page_set_colors();
644
645
646 /*
647 * Steal memory for the map and zone subsystems.
648 */
649 kernel_debug_string_simple("zone_steal_memory");
650 zone_steal_memory();
651 kernel_debug_string_simple("vm_map_steal_memory");
652 vm_map_steal_memory();
653
654 /*
655 * Allocate (and initialize) the virtual-to-physical
656 * table hash buckets.
657 *
658 * The number of buckets should be a power of two to
659 * get a good hash function. The following computation
660 * chooses the first power of two that is greater
661 * than the number of physical pages in the system.
662 */
663
664 if (vm_page_bucket_count == 0) {
665 unsigned int npages = pmap_free_pages();
666
667 vm_page_bucket_count = 1;
668 while (vm_page_bucket_count < npages)
669 vm_page_bucket_count <<= 1;
670 }
671 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
672
673 vm_page_hash_mask = vm_page_bucket_count - 1;
674
675 /*
676 * Calculate object shift value for hashing algorithm:
677 * O = log2(sizeof(struct vm_object))
678 * B = log2(vm_page_bucket_count)
679 * hash shifts the object left by
680 * B/2 - O
681 */
682 size = vm_page_bucket_count;
683 for (log1 = 0; size > 1; log1++)
684 size /= 2;
685 size = sizeof(struct vm_object);
686 for (log2 = 0; size > 1; log2++)
687 size /= 2;
688 vm_page_hash_shift = log1/2 - log2 + 1;
689
690 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
691 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
692 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
693
694 if (vm_page_hash_mask & vm_page_bucket_count)
695 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
696
697 #if VM_PAGE_BUCKETS_CHECK
698 #if VM_PAGE_FAKE_BUCKETS
699 /*
700 * Allocate a decoy set of page buckets, to detect
701 * any stomping there.
702 */
703 vm_page_fake_buckets = (vm_page_bucket_t *)
704 pmap_steal_memory(vm_page_bucket_count *
705 sizeof(vm_page_bucket_t));
706 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
707 vm_page_fake_buckets_end =
708 vm_map_round_page((vm_page_fake_buckets_start +
709 (vm_page_bucket_count *
710 sizeof (vm_page_bucket_t))),
711 PAGE_MASK);
712 char *cp;
713 for (cp = (char *)vm_page_fake_buckets_start;
714 cp < (char *)vm_page_fake_buckets_end;
715 cp++) {
716 *cp = 0x5a;
717 }
718 #endif /* VM_PAGE_FAKE_BUCKETS */
719 #endif /* VM_PAGE_BUCKETS_CHECK */
720
721 kernel_debug_string_simple("vm_page_buckets");
722 vm_page_buckets = (vm_page_bucket_t *)
723 pmap_steal_memory(vm_page_bucket_count *
724 sizeof(vm_page_bucket_t));
725
726 kernel_debug_string_simple("vm_page_bucket_locks");
727 vm_page_bucket_locks = (lck_spin_t *)
728 pmap_steal_memory(vm_page_bucket_lock_count *
729 sizeof(lck_spin_t));
730
731 for (i = 0; i < vm_page_bucket_count; i++) {
732 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
733
734 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
735 #if MACH_PAGE_HASH_STATS
736 bucket->cur_count = 0;
737 bucket->hi_count = 0;
738 #endif /* MACH_PAGE_HASH_STATS */
739 }
740
741 for (i = 0; i < vm_page_bucket_lock_count; i++)
742 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
743
744 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
745 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
746 vm_tag_init();
747
748 #if VM_PAGE_BUCKETS_CHECK
749 vm_page_buckets_check_ready = TRUE;
750 #endif /* VM_PAGE_BUCKETS_CHECK */
751
752 /*
753 * Machine-dependent code allocates the resident page table.
754 * It uses vm_page_init to initialize the page frames.
755 * The code also returns to us the virtual space available
756 * to the kernel. We don't trust the pmap module
757 * to get the alignment right.
758 */
759
760 kernel_debug_string_simple("pmap_startup");
761 pmap_startup(&virtual_space_start, &virtual_space_end);
762 virtual_space_start = round_page(virtual_space_start);
763 virtual_space_end = trunc_page(virtual_space_end);
764
765 *startp = virtual_space_start;
766 *endp = virtual_space_end;
767
768 /*
769 * Compute the initial "wire" count.
770 * Up until now, the pages which have been set aside are not under
771 * the VM system's control, so although they aren't explicitly
772 * wired, they nonetheless can't be moved. At this moment,
773 * all VM managed pages are "free", courtesy of pmap_startup.
774 */
775 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
776 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
777 vm_page_wire_count_initial = vm_page_wire_count;
778 vm_page_pages_initial = vm_page_pages;
779
780 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
781 vm_page_free_count, vm_page_wire_count);
782
783 kernel_debug_string_simple("vm_page_bootstrap complete");
784 simple_lock_init(&vm_paging_lock, 0);
785 }
786
787 #ifndef MACHINE_PAGES
788 /*
789 * We implement pmap_steal_memory and pmap_startup with the help
790 * of two simpler functions, pmap_virtual_space and pmap_next_page.
791 */
792
793 void *
794 pmap_steal_memory(
795 vm_size_t size)
796 {
797 vm_offset_t addr, vaddr;
798 ppnum_t phys_page;
799
800 /*
801 * We round the size to a round multiple.
802 */
803
804 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
805
806 /*
807 * If this is the first call to pmap_steal_memory,
808 * we have to initialize ourself.
809 */
810
811 if (virtual_space_start == virtual_space_end) {
812 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
813
814 /*
815 * The initial values must be aligned properly, and
816 * we don't trust the pmap module to do it right.
817 */
818
819 virtual_space_start = round_page(virtual_space_start);
820 virtual_space_end = trunc_page(virtual_space_end);
821 }
822
823 /*
824 * Allocate virtual memory for this request.
825 */
826
827 addr = virtual_space_start;
828 virtual_space_start += size;
829
830 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
831
832 /*
833 * Allocate and map physical pages to back new virtual pages.
834 */
835
836 for (vaddr = round_page(addr);
837 vaddr < addr + size;
838 vaddr += PAGE_SIZE) {
839
840 if (!pmap_next_page_hi(&phys_page))
841 panic("pmap_steal_memory");
842
843 /*
844 * XXX Logically, these mappings should be wired,
845 * but some pmap modules barf if they are.
846 */
847 #if defined(__LP64__)
848 pmap_pre_expand(kernel_pmap, vaddr);
849 #endif
850
851 pmap_enter(kernel_pmap, vaddr, phys_page,
852 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
853 VM_WIMG_USE_DEFAULT, FALSE);
854 /*
855 * Account for newly stolen memory
856 */
857 vm_page_wire_count++;
858 vm_page_stolen_count++;
859 }
860
861 return (void *) addr;
862 }
863
864 void vm_page_release_startup(vm_page_t mem);
865 void
866 pmap_startup(
867 vm_offset_t *startp,
868 vm_offset_t *endp)
869 {
870 unsigned int i, npages, pages_initialized, fill, fillval;
871 ppnum_t phys_page;
872 addr64_t tmpaddr;
873
874
875 #if defined(__LP64__)
876 /*
877 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
878 */
879 assert(sizeof(struct vm_page) == 64);
880
881 /*
882 * make sure we are aligned on a 64 byte boundary
883 * for VM_PAGE_PACK_PTR (it clips off the low-order
884 * 6 bits of the pointer)
885 */
886 if (virtual_space_start != virtual_space_end)
887 virtual_space_start = round_page(virtual_space_start);
888 #endif
889
890 /*
891 * We calculate how many page frames we will have
892 * and then allocate the page structures in one chunk.
893 */
894
895 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
896 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
897 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
898
899 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
900
901 /*
902 * Initialize the page frames.
903 */
904 kernel_debug_string_simple("Initialize the page frames");
905 for (i = 0, pages_initialized = 0; i < npages; i++) {
906 if (!pmap_next_page(&phys_page))
907 break;
908 if (pages_initialized == 0 || phys_page < vm_page_lowest)
909 vm_page_lowest = phys_page;
910
911 vm_page_init(&vm_pages[i], phys_page, FALSE);
912 vm_page_pages++;
913 pages_initialized++;
914 }
915 vm_pages_count = pages_initialized;
916
917 #if defined(__LP64__)
918
919 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
920 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
921
922 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
923 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
924 #endif
925 kernel_debug_string_simple("page fill/release");
926 /*
927 * Check if we want to initialize pages to a known value
928 */
929 fill = 0; /* Assume no fill */
930 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
931 #if DEBUG
932 /* This slows down booting the DEBUG kernel, particularly on
933 * large memory systems, but is worthwhile in deterministically
934 * trapping uninitialized memory usage.
935 */
936 if (fill == 0) {
937 fill = 1;
938 fillval = 0xDEB8F177;
939 }
940 #endif
941 if (fill)
942 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
943 // -debug code remove
944 if (2 == vm_himemory_mode) {
945 // free low -> high so high is preferred
946 for (i = 1; i <= pages_initialized; i++) {
947 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
948 vm_page_release_startup(&vm_pages[i - 1]);
949 }
950 }
951 else
952 // debug code remove-
953
954 /*
955 * Release pages in reverse order so that physical pages
956 * initially get allocated in ascending addresses. This keeps
957 * the devices (which must address physical memory) happy if
958 * they require several consecutive pages.
959 */
960 for (i = pages_initialized; i > 0; i--) {
961 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
962 vm_page_release_startup(&vm_pages[i - 1]);
963 }
964
965 VM_CHECK_MEMORYSTATUS;
966
967 #if 0
968 {
969 vm_page_t xx, xxo, xxl;
970 int i, j, k, l;
971
972 j = 0; /* (BRINGUP) */
973 xxl = 0;
974
975 for( i = 0; i < vm_colors; i++ ) {
976 queue_iterate(&vm_page_queue_free[i],
977 xx,
978 vm_page_t,
979 pageq) { /* BRINGUP */
980 j++; /* (BRINGUP) */
981 if(j > vm_page_free_count) { /* (BRINGUP) */
982 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
983 }
984
985 l = vm_page_free_count - j; /* (BRINGUP) */
986 k = 0; /* (BRINGUP) */
987
988 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
989
990 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
991 k++;
992 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
993 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
994 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
995 }
996 }
997
998 xxl = xx;
999 }
1000 }
1001
1002 if(j != vm_page_free_count) { /* (BRINGUP) */
1003 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1004 }
1005 }
1006 #endif
1007
1008
1009 /*
1010 * We have to re-align virtual_space_start,
1011 * because pmap_steal_memory has been using it.
1012 */
1013
1014 virtual_space_start = round_page(virtual_space_start);
1015
1016 *startp = virtual_space_start;
1017 *endp = virtual_space_end;
1018 }
1019 #endif /* MACHINE_PAGES */
1020
1021 /*
1022 * Routine: vm_page_module_init
1023 * Purpose:
1024 * Second initialization pass, to be done after
1025 * the basic VM system is ready.
1026 */
1027 void
1028 vm_page_module_init(void)
1029 {
1030 uint64_t vm_page_zone_pages, vm_page_zone_data_size;
1031 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1032 0, PAGE_SIZE, "vm pages");
1033
1034 #if ZONE_DEBUG
1035 zone_debug_disable(vm_page_zone);
1036 #endif /* ZONE_DEBUG */
1037
1038 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1039 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1040 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1041 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1042 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1043 /*
1044 * Adjust zone statistics to account for the real pages allocated
1045 * in vm_page_create(). [Q: is this really what we want?]
1046 */
1047 vm_page_zone->count += vm_page_pages;
1048 vm_page_zone->sum_count += vm_page_pages;
1049 vm_page_zone_data_size = vm_page_pages * vm_page_zone->elem_size;
1050 vm_page_zone->cur_size += vm_page_zone_data_size;
1051 vm_page_zone_pages = ((round_page(vm_page_zone_data_size)) / PAGE_SIZE);
1052 OSAddAtomic64(vm_page_zone_pages, &(vm_page_zone->page_count));
1053 /* since zone accounts for these, take them out of stolen */
1054 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1055 }
1056
1057 /*
1058 * Routine: vm_page_create
1059 * Purpose:
1060 * After the VM system is up, machine-dependent code
1061 * may stumble across more physical memory. For example,
1062 * memory that it was reserving for a frame buffer.
1063 * vm_page_create turns this memory into available pages.
1064 */
1065
1066 void
1067 vm_page_create(
1068 ppnum_t start,
1069 ppnum_t end)
1070 {
1071 ppnum_t phys_page;
1072 vm_page_t m;
1073
1074 for (phys_page = start;
1075 phys_page < end;
1076 phys_page++) {
1077 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1078 == VM_PAGE_NULL)
1079 vm_page_more_fictitious();
1080
1081 m->fictitious = FALSE;
1082 pmap_clear_noencrypt(phys_page);
1083
1084 vm_page_pages++;
1085 vm_page_release(m);
1086 }
1087 }
1088
1089 /*
1090 * vm_page_hash:
1091 *
1092 * Distributes the object/offset key pair among hash buckets.
1093 *
1094 * NOTE: The bucket count must be a power of 2
1095 */
1096 #define vm_page_hash(object, offset) (\
1097 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1098 & vm_page_hash_mask)
1099
1100
1101 /*
1102 * vm_page_insert: [ internal use only ]
1103 *
1104 * Inserts the given mem entry into the object/object-page
1105 * table and object list.
1106 *
1107 * The object must be locked.
1108 */
1109 void
1110 vm_page_insert(
1111 vm_page_t mem,
1112 vm_object_t object,
1113 vm_object_offset_t offset)
1114 {
1115 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1116 }
1117
1118 void
1119 vm_page_insert_wired(
1120 vm_page_t mem,
1121 vm_object_t object,
1122 vm_object_offset_t offset,
1123 vm_tag_t tag)
1124 {
1125 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1126 }
1127
1128 void
1129 vm_page_insert_internal(
1130 vm_page_t mem,
1131 vm_object_t object,
1132 vm_object_offset_t offset,
1133 vm_tag_t tag,
1134 boolean_t queues_lock_held,
1135 boolean_t insert_in_hash,
1136 boolean_t batch_pmap_op,
1137 boolean_t batch_accounting,
1138 uint64_t *delayed_ledger_update)
1139 {
1140 vm_page_bucket_t *bucket;
1141 lck_spin_t *bucket_lock;
1142 int hash_id;
1143 task_t owner;
1144
1145 XPR(XPR_VM_PAGE,
1146 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1147 object, offset, mem, 0,0);
1148 #if 0
1149 /*
1150 * we may not hold the page queue lock
1151 * so this check isn't safe to make
1152 */
1153 VM_PAGE_CHECK(mem);
1154 #endif
1155
1156 assert(page_aligned(offset));
1157
1158 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1159
1160 /* the vm_submap_object is only a placeholder for submaps */
1161 assert(object != vm_submap_object);
1162
1163 vm_object_lock_assert_exclusive(object);
1164 #if DEBUG
1165 lck_mtx_assert(&vm_page_queue_lock,
1166 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1167 : LCK_MTX_ASSERT_NOTOWNED);
1168 #endif /* DEBUG */
1169
1170 if (insert_in_hash == TRUE) {
1171 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1172 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1173 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1174 "already in (obj=%p,off=0x%llx)",
1175 mem, object, offset, mem->object, mem->offset);
1176 #endif
1177 assert(!object->internal || offset < object->vo_size);
1178
1179 /* only insert "pageout" pages into "pageout" objects,
1180 * and normal pages into normal objects */
1181 #if 00
1182 /*
1183 * For some reason, this assertion gets tripped
1184 * but it's mostly harmless, so let's disable it
1185 * for now.
1186 */
1187 assert(object->pageout == mem->pageout);
1188 #endif /* 00 */
1189
1190 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1191
1192 /*
1193 * Record the object/offset pair in this page
1194 */
1195
1196 mem->object = object;
1197 mem->offset = offset;
1198
1199 /*
1200 * Insert it into the object_object/offset hash table
1201 */
1202 hash_id = vm_page_hash(object, offset);
1203 bucket = &vm_page_buckets[hash_id];
1204 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1205
1206 lck_spin_lock(bucket_lock);
1207
1208 mem->next_m = bucket->page_list;
1209 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1210 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1211
1212 #if MACH_PAGE_HASH_STATS
1213 if (++bucket->cur_count > bucket->hi_count)
1214 bucket->hi_count = bucket->cur_count;
1215 #endif /* MACH_PAGE_HASH_STATS */
1216 mem->hashed = TRUE;
1217 lck_spin_unlock(bucket_lock);
1218 }
1219
1220 {
1221 unsigned int cache_attr;
1222
1223 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1224
1225 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1226 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1227 }
1228 }
1229 /*
1230 * Now link into the object's list of backed pages.
1231 */
1232 queue_enter(&object->memq, mem, vm_page_t, listq);
1233 object->memq_hint = mem;
1234 mem->tabled = TRUE;
1235
1236 /*
1237 * Show that the object has one more resident page.
1238 */
1239
1240 object->resident_page_count++;
1241 if (VM_PAGE_WIRED(mem)) {
1242 if (!mem->private && !mem->fictitious)
1243 {
1244 if (!object->wired_page_count)
1245 {
1246 assert(VM_KERN_MEMORY_NONE != tag);
1247 object->wire_tag = tag;
1248 VM_OBJECT_WIRED(object);
1249 }
1250 }
1251 object->wired_page_count++;
1252 }
1253 assert(object->resident_page_count >= object->wired_page_count);
1254
1255 if (batch_accounting == FALSE) {
1256 if (object->internal) {
1257 OSAddAtomic(1, &vm_page_internal_count);
1258 } else {
1259 OSAddAtomic(1, &vm_page_external_count);
1260 }
1261 }
1262
1263 /*
1264 * It wouldn't make sense to insert a "reusable" page in
1265 * an object (the page would have been marked "reusable" only
1266 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1267 * in the object at that time).
1268 * But a page could be inserted in a "all_reusable" object, if
1269 * something faults it in (a vm_read() from another task or a
1270 * "use-after-free" issue in user space, for example). It can
1271 * also happen if we're relocating a page from that object to
1272 * a different physical page during a physically-contiguous
1273 * allocation.
1274 */
1275 assert(!mem->reusable);
1276 if (mem->object->all_reusable) {
1277 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1278 }
1279
1280 if (object->purgable == VM_PURGABLE_DENY) {
1281 owner = TASK_NULL;
1282 } else {
1283 owner = object->vo_purgeable_owner;
1284 }
1285 if (owner &&
1286 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1287 VM_PAGE_WIRED(mem))) {
1288
1289 if (delayed_ledger_update)
1290 *delayed_ledger_update += PAGE_SIZE;
1291 else {
1292 /* more non-volatile bytes */
1293 ledger_credit(owner->ledger,
1294 task_ledgers.purgeable_nonvolatile,
1295 PAGE_SIZE);
1296 /* more footprint */
1297 ledger_credit(owner->ledger,
1298 task_ledgers.phys_footprint,
1299 PAGE_SIZE);
1300 }
1301
1302 } else if (owner &&
1303 (object->purgable == VM_PURGABLE_VOLATILE ||
1304 object->purgable == VM_PURGABLE_EMPTY)) {
1305 assert(! VM_PAGE_WIRED(mem));
1306 /* more volatile bytes */
1307 ledger_credit(owner->ledger,
1308 task_ledgers.purgeable_volatile,
1309 PAGE_SIZE);
1310 }
1311
1312 if (object->purgable == VM_PURGABLE_VOLATILE) {
1313 if (VM_PAGE_WIRED(mem)) {
1314 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1315 } else {
1316 OSAddAtomic(+1, &vm_page_purgeable_count);
1317 }
1318 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1319 mem->throttled) {
1320 /*
1321 * This page belongs to a purged VM object but hasn't
1322 * been purged (because it was "busy").
1323 * It's in the "throttled" queue and hence not
1324 * visible to vm_pageout_scan(). Move it to a pageable
1325 * queue, so that it can eventually be reclaimed, instead
1326 * of lingering in the "empty" object.
1327 */
1328 if (queues_lock_held == FALSE)
1329 vm_page_lockspin_queues();
1330 vm_page_deactivate(mem);
1331 if (queues_lock_held == FALSE)
1332 vm_page_unlock_queues();
1333 }
1334
1335 #if VM_OBJECT_TRACKING_OP_MODIFIED
1336 if (vm_object_tracking_inited &&
1337 object->internal &&
1338 object->resident_page_count == 0 &&
1339 object->pager == NULL &&
1340 object->shadow != NULL &&
1341 object->shadow->copy == object) {
1342 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1343 int numsaved = 0;
1344
1345 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1346 btlog_add_entry(vm_object_tracking_btlog,
1347 object,
1348 VM_OBJECT_TRACKING_OP_MODIFIED,
1349 bt,
1350 numsaved);
1351 }
1352 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1353 }
1354
1355 /*
1356 * vm_page_replace:
1357 *
1358 * Exactly like vm_page_insert, except that we first
1359 * remove any existing page at the given offset in object.
1360 *
1361 * The object must be locked.
1362 */
1363 void
1364 vm_page_replace(
1365 register vm_page_t mem,
1366 register vm_object_t object,
1367 register vm_object_offset_t offset)
1368 {
1369 vm_page_bucket_t *bucket;
1370 vm_page_t found_m = VM_PAGE_NULL;
1371 lck_spin_t *bucket_lock;
1372 int hash_id;
1373
1374 #if 0
1375 /*
1376 * we don't hold the page queue lock
1377 * so this check isn't safe to make
1378 */
1379 VM_PAGE_CHECK(mem);
1380 #endif
1381 vm_object_lock_assert_exclusive(object);
1382 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1383 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1384 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1385 "already in (obj=%p,off=0x%llx)",
1386 mem, object, offset, mem->object, mem->offset);
1387 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1388 #endif
1389 /*
1390 * Record the object/offset pair in this page
1391 */
1392
1393 mem->object = object;
1394 mem->offset = offset;
1395
1396 /*
1397 * Insert it into the object_object/offset hash table,
1398 * replacing any page that might have been there.
1399 */
1400
1401 hash_id = vm_page_hash(object, offset);
1402 bucket = &vm_page_buckets[hash_id];
1403 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1404
1405 lck_spin_lock(bucket_lock);
1406
1407 if (bucket->page_list) {
1408 vm_page_packed_t *mp = &bucket->page_list;
1409 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1410
1411 do {
1412 if (m->object == object && m->offset == offset) {
1413 /*
1414 * Remove old page from hash list
1415 */
1416 *mp = m->next_m;
1417 m->hashed = FALSE;
1418
1419 found_m = m;
1420 break;
1421 }
1422 mp = &m->next_m;
1423 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1424
1425 mem->next_m = bucket->page_list;
1426 } else {
1427 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1428 }
1429 /*
1430 * insert new page at head of hash list
1431 */
1432 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1433 mem->hashed = TRUE;
1434
1435 lck_spin_unlock(bucket_lock);
1436
1437 if (found_m) {
1438 /*
1439 * there was already a page at the specified
1440 * offset for this object... remove it from
1441 * the object and free it back to the free list
1442 */
1443 vm_page_free_unlocked(found_m, FALSE);
1444 }
1445 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1446 }
1447
1448 /*
1449 * vm_page_remove: [ internal use only ]
1450 *
1451 * Removes the given mem entry from the object/offset-page
1452 * table and the object page list.
1453 *
1454 * The object must be locked.
1455 */
1456
1457 void
1458 vm_page_remove(
1459 vm_page_t mem,
1460 boolean_t remove_from_hash)
1461 {
1462 vm_page_bucket_t *bucket;
1463 vm_page_t this;
1464 lck_spin_t *bucket_lock;
1465 int hash_id;
1466 task_t owner;
1467
1468 XPR(XPR_VM_PAGE,
1469 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1470 mem->object, mem->offset,
1471 mem, 0,0);
1472
1473 vm_object_lock_assert_exclusive(mem->object);
1474 assert(mem->tabled);
1475 assert(!mem->cleaning);
1476 assert(!mem->laundry);
1477 #if 0
1478 /*
1479 * we don't hold the page queue lock
1480 * so this check isn't safe to make
1481 */
1482 VM_PAGE_CHECK(mem);
1483 #endif
1484 if (remove_from_hash == TRUE) {
1485 /*
1486 * Remove from the object_object/offset hash table
1487 */
1488 hash_id = vm_page_hash(mem->object, mem->offset);
1489 bucket = &vm_page_buckets[hash_id];
1490 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1491
1492 lck_spin_lock(bucket_lock);
1493
1494 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1495 /* optimize for common case */
1496
1497 bucket->page_list = mem->next_m;
1498 } else {
1499 vm_page_packed_t *prev;
1500
1501 for (prev = &this->next_m;
1502 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1503 prev = &this->next_m)
1504 continue;
1505 *prev = this->next_m;
1506 }
1507 #if MACH_PAGE_HASH_STATS
1508 bucket->cur_count--;
1509 #endif /* MACH_PAGE_HASH_STATS */
1510 mem->hashed = FALSE;
1511 lck_spin_unlock(bucket_lock);
1512 }
1513 /*
1514 * Now remove from the object's list of backed pages.
1515 */
1516
1517 vm_page_remove_internal(mem);
1518
1519 /*
1520 * And show that the object has one fewer resident
1521 * page.
1522 */
1523
1524 assert(mem->object->resident_page_count > 0);
1525 mem->object->resident_page_count--;
1526
1527 if (mem->object->internal) {
1528 #if DEBUG
1529 assert(vm_page_internal_count);
1530 #endif /* DEBUG */
1531
1532 OSAddAtomic(-1, &vm_page_internal_count);
1533 } else {
1534 assert(vm_page_external_count);
1535 OSAddAtomic(-1, &vm_page_external_count);
1536
1537 if (mem->xpmapped) {
1538 assert(vm_page_xpmapped_external_count);
1539 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1540 }
1541 }
1542 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1543 if (mem->object->resident_page_count == 0)
1544 vm_object_cache_remove(mem->object);
1545 }
1546
1547 if (VM_PAGE_WIRED(mem)) {
1548 assert(mem->object->wired_page_count > 0);
1549 mem->object->wired_page_count--;
1550 if (!mem->object->wired_page_count) {
1551 VM_OBJECT_UNWIRED(mem->object);
1552 }
1553 }
1554 assert(mem->object->resident_page_count >=
1555 mem->object->wired_page_count);
1556 if (mem->reusable) {
1557 assert(mem->object->reusable_page_count > 0);
1558 mem->object->reusable_page_count--;
1559 assert(mem->object->reusable_page_count <=
1560 mem->object->resident_page_count);
1561 mem->reusable = FALSE;
1562 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1563 vm_page_stats_reusable.reused_remove++;
1564 } else if (mem->object->all_reusable) {
1565 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1566 vm_page_stats_reusable.reused_remove++;
1567 }
1568
1569 if (mem->object->purgable == VM_PURGABLE_DENY) {
1570 owner = TASK_NULL;
1571 } else {
1572 owner = mem->object->vo_purgeable_owner;
1573 }
1574 if (owner &&
1575 (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1576 VM_PAGE_WIRED(mem))) {
1577 /* less non-volatile bytes */
1578 ledger_debit(owner->ledger,
1579 task_ledgers.purgeable_nonvolatile,
1580 PAGE_SIZE);
1581 /* less footprint */
1582 ledger_debit(owner->ledger,
1583 task_ledgers.phys_footprint,
1584 PAGE_SIZE);
1585 } else if (owner &&
1586 (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1587 mem->object->purgable == VM_PURGABLE_EMPTY)) {
1588 assert(! VM_PAGE_WIRED(mem));
1589 /* less volatile bytes */
1590 ledger_debit(owner->ledger,
1591 task_ledgers.purgeable_volatile,
1592 PAGE_SIZE);
1593 }
1594 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1595 if (VM_PAGE_WIRED(mem)) {
1596 assert(vm_page_purgeable_wired_count > 0);
1597 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1598 } else {
1599 assert(vm_page_purgeable_count > 0);
1600 OSAddAtomic(-1, &vm_page_purgeable_count);
1601 }
1602 }
1603 if (mem->object->set_cache_attr == TRUE)
1604 pmap_set_cache_attributes(mem->phys_page, 0);
1605
1606 mem->tabled = FALSE;
1607 mem->object = VM_OBJECT_NULL;
1608 mem->offset = (vm_object_offset_t) -1;
1609 }
1610
1611
1612 /*
1613 * vm_page_lookup:
1614 *
1615 * Returns the page associated with the object/offset
1616 * pair specified; if none is found, VM_PAGE_NULL is returned.
1617 *
1618 * The object must be locked. No side effects.
1619 */
1620
1621 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1622
1623 #if DEBUG_VM_PAGE_LOOKUP
1624
1625 struct {
1626 uint64_t vpl_total;
1627 uint64_t vpl_empty_obj;
1628 uint64_t vpl_bucket_NULL;
1629 uint64_t vpl_hit_hint;
1630 uint64_t vpl_hit_hint_next;
1631 uint64_t vpl_hit_hint_prev;
1632 uint64_t vpl_fast;
1633 uint64_t vpl_slow;
1634 uint64_t vpl_hit;
1635 uint64_t vpl_miss;
1636
1637 uint64_t vpl_fast_elapsed;
1638 uint64_t vpl_slow_elapsed;
1639 } vm_page_lookup_stats __attribute__((aligned(8)));
1640
1641 #endif
1642
1643 #define KDP_VM_PAGE_WALK_MAX 1000
1644
1645 vm_page_t
1646 kdp_vm_page_lookup(
1647 vm_object_t object,
1648 vm_object_offset_t offset)
1649 {
1650 vm_page_t cur_page;
1651 int num_traversed = 0;
1652
1653 if (not_in_kdp) {
1654 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1655 }
1656
1657 queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1658 if (cur_page->offset == offset) {
1659 return cur_page;
1660 }
1661 num_traversed++;
1662
1663 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1664 return VM_PAGE_NULL;
1665 }
1666 }
1667
1668 return VM_PAGE_NULL;
1669 }
1670
1671 vm_page_t
1672 vm_page_lookup(
1673 vm_object_t object,
1674 vm_object_offset_t offset)
1675 {
1676 vm_page_t mem;
1677 vm_page_bucket_t *bucket;
1678 queue_entry_t qe;
1679 lck_spin_t *bucket_lock = NULL;
1680 int hash_id;
1681 #if DEBUG_VM_PAGE_LOOKUP
1682 uint64_t start, elapsed;
1683
1684 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1685 #endif
1686 vm_object_lock_assert_held(object);
1687
1688 if (object->resident_page_count == 0) {
1689 #if DEBUG_VM_PAGE_LOOKUP
1690 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
1691 #endif
1692 return (VM_PAGE_NULL);
1693 }
1694
1695 mem = object->memq_hint;
1696
1697 if (mem != VM_PAGE_NULL) {
1698 assert(mem->object == object);
1699
1700 if (mem->offset == offset) {
1701 #if DEBUG_VM_PAGE_LOOKUP
1702 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
1703 #endif
1704 return (mem);
1705 }
1706 qe = queue_next(&mem->listq);
1707
1708 if (! queue_end(&object->memq, qe)) {
1709 vm_page_t next_page;
1710
1711 next_page = (vm_page_t) qe;
1712 assert(next_page->object == object);
1713
1714 if (next_page->offset == offset) {
1715 object->memq_hint = next_page; /* new hint */
1716 #if DEBUG_VM_PAGE_LOOKUP
1717 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
1718 #endif
1719 return (next_page);
1720 }
1721 }
1722 qe = queue_prev(&mem->listq);
1723
1724 if (! queue_end(&object->memq, qe)) {
1725 vm_page_t prev_page;
1726
1727 prev_page = (vm_page_t) qe;
1728 assert(prev_page->object == object);
1729
1730 if (prev_page->offset == offset) {
1731 object->memq_hint = prev_page; /* new hint */
1732 #if DEBUG_VM_PAGE_LOOKUP
1733 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
1734 #endif
1735 return (prev_page);
1736 }
1737 }
1738 }
1739 /*
1740 * Search the hash table for this object/offset pair
1741 */
1742 hash_id = vm_page_hash(object, offset);
1743 bucket = &vm_page_buckets[hash_id];
1744
1745 /*
1746 * since we hold the object lock, we are guaranteed that no
1747 * new pages can be inserted into this object... this in turn
1748 * guarantess that the page we're looking for can't exist
1749 * if the bucket it hashes to is currently NULL even when looked
1750 * at outside the scope of the hash bucket lock... this is a
1751 * really cheap optimiztion to avoid taking the lock
1752 */
1753 if (!bucket->page_list) {
1754 #if DEBUG_VM_PAGE_LOOKUP
1755 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
1756 #endif
1757 return (VM_PAGE_NULL);
1758 }
1759
1760 #if DEBUG_VM_PAGE_LOOKUP
1761 start = mach_absolute_time();
1762 #endif
1763 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
1764 /*
1765 * on average, it's roughly 3 times faster to run a short memq list
1766 * than to take the spin lock and go through the hash list
1767 */
1768 mem = (vm_page_t)queue_first(&object->memq);
1769
1770 while (!queue_end(&object->memq, (queue_entry_t)mem)) {
1771
1772 if (mem->offset == offset)
1773 break;
1774
1775 mem = (vm_page_t)queue_next(&mem->listq);
1776 }
1777 if (queue_end(&object->memq, (queue_entry_t)mem))
1778 mem = NULL;
1779 } else {
1780
1781 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1782
1783 lck_spin_lock(bucket_lock);
1784
1785 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1786 #if 0
1787 /*
1788 * we don't hold the page queue lock
1789 * so this check isn't safe to make
1790 */
1791 VM_PAGE_CHECK(mem);
1792 #endif
1793 if ((mem->object == object) && (mem->offset == offset))
1794 break;
1795 }
1796 lck_spin_unlock(bucket_lock);
1797 }
1798
1799 #if DEBUG_VM_PAGE_LOOKUP
1800 elapsed = mach_absolute_time() - start;
1801
1802 if (bucket_lock) {
1803 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
1804 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
1805 } else {
1806 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
1807 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
1808 }
1809 if (mem != VM_PAGE_NULL)
1810 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
1811 else
1812 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
1813 #endif
1814 if (mem != VM_PAGE_NULL) {
1815 assert(mem->object == object);
1816
1817 object->memq_hint = mem;
1818 }
1819 return (mem);
1820 }
1821
1822
1823 /*
1824 * vm_page_rename:
1825 *
1826 * Move the given memory entry from its
1827 * current object to the specified target object/offset.
1828 *
1829 * The object must be locked.
1830 */
1831 void
1832 vm_page_rename(
1833 register vm_page_t mem,
1834 register vm_object_t new_object,
1835 vm_object_offset_t new_offset,
1836 boolean_t encrypted_ok)
1837 {
1838 boolean_t internal_to_external, external_to_internal;
1839 vm_tag_t tag;
1840
1841 assert(mem->object != new_object);
1842
1843 assert(mem->object);
1844
1845 /*
1846 * ENCRYPTED SWAP:
1847 * The encryption key is based on the page's memory object
1848 * (aka "pager") and paging offset. Moving the page to
1849 * another VM object changes its "pager" and "paging_offset"
1850 * so it has to be decrypted first, or we would lose the key.
1851 *
1852 * One exception is VM object collapsing, where we transfer pages
1853 * from one backing object to its parent object. This operation also
1854 * transfers the paging information, so the <pager,paging_offset> info
1855 * should remain consistent. The caller (vm_object_do_collapse())
1856 * sets "encrypted_ok" in this case.
1857 */
1858 if (!encrypted_ok && mem->encrypted) {
1859 panic("vm_page_rename: page %p is encrypted\n", mem);
1860 }
1861
1862 XPR(XPR_VM_PAGE,
1863 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1864 new_object, new_offset,
1865 mem, 0,0);
1866
1867 /*
1868 * Changes to mem->object require the page lock because
1869 * the pageout daemon uses that lock to get the object.
1870 */
1871 vm_page_lockspin_queues();
1872
1873 internal_to_external = FALSE;
1874 external_to_internal = FALSE;
1875
1876 if (mem->local) {
1877 /*
1878 * it's much easier to get the vm_page_pageable_xxx accounting correct
1879 * if we first move the page to the active queue... it's going to end
1880 * up there anyway, and we don't do vm_page_rename's frequently enough
1881 * for this to matter.
1882 */
1883 vm_page_queues_remove(mem);
1884 vm_page_activate(mem);
1885 }
1886 if (mem->active || mem->inactive || mem->speculative) {
1887 if (mem->object->internal && !new_object->internal) {
1888 internal_to_external = TRUE;
1889 }
1890 if (!mem->object->internal && new_object->internal) {
1891 external_to_internal = TRUE;
1892 }
1893 }
1894
1895 tag = mem->object->wire_tag;
1896 vm_page_remove(mem, TRUE);
1897 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
1898
1899 if (internal_to_external) {
1900 vm_page_pageable_internal_count--;
1901 vm_page_pageable_external_count++;
1902 } else if (external_to_internal) {
1903 vm_page_pageable_external_count--;
1904 vm_page_pageable_internal_count++;
1905 }
1906
1907 vm_page_unlock_queues();
1908 }
1909
1910 /*
1911 * vm_page_init:
1912 *
1913 * Initialize the fields in a new page.
1914 * This takes a structure with random values and initializes it
1915 * so that it can be given to vm_page_release or vm_page_insert.
1916 */
1917 void
1918 vm_page_init(
1919 vm_page_t mem,
1920 ppnum_t phys_page,
1921 boolean_t lopage)
1922 {
1923 assert(phys_page);
1924
1925 #if DEBUG
1926 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1927 if (!(pmap_valid_page(phys_page))) {
1928 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1929 }
1930 }
1931 #endif
1932 *mem = vm_page_template;
1933 mem->phys_page = phys_page;
1934 #if 0
1935 /*
1936 * we're leaving this turned off for now... currently pages
1937 * come off the free list and are either immediately dirtied/referenced
1938 * due to zero-fill or COW faults, or are used to read or write files...
1939 * in the file I/O case, the UPL mechanism takes care of clearing
1940 * the state of the HW ref/mod bits in a somewhat fragile way.
1941 * Since we may change the way this works in the future (to toughen it up),
1942 * I'm leaving this as a reminder of where these bits could get cleared
1943 */
1944
1945 /*
1946 * make sure both the h/w referenced and modified bits are
1947 * clear at this point... we are especially dependent on
1948 * not finding a 'stale' h/w modified in a number of spots
1949 * once this page goes back into use
1950 */
1951 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1952 #endif
1953 mem->lopage = lopage;
1954 }
1955
1956 /*
1957 * vm_page_grab_fictitious:
1958 *
1959 * Remove a fictitious page from the free list.
1960 * Returns VM_PAGE_NULL if there are no free pages.
1961 */
1962 int c_vm_page_grab_fictitious = 0;
1963 int c_vm_page_grab_fictitious_failed = 0;
1964 int c_vm_page_release_fictitious = 0;
1965 int c_vm_page_more_fictitious = 0;
1966
1967 vm_page_t
1968 vm_page_grab_fictitious_common(
1969 ppnum_t phys_addr)
1970 {
1971 vm_page_t m;
1972
1973 if ((m = (vm_page_t)zget(vm_page_zone))) {
1974
1975 vm_page_init(m, phys_addr, FALSE);
1976 m->fictitious = TRUE;
1977
1978 c_vm_page_grab_fictitious++;
1979 } else
1980 c_vm_page_grab_fictitious_failed++;
1981
1982 return m;
1983 }
1984
1985 vm_page_t
1986 vm_page_grab_fictitious(void)
1987 {
1988 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1989 }
1990
1991 vm_page_t
1992 vm_page_grab_guard(void)
1993 {
1994 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1995 }
1996
1997
1998 /*
1999 * vm_page_release_fictitious:
2000 *
2001 * Release a fictitious page to the zone pool
2002 */
2003 void
2004 vm_page_release_fictitious(
2005 vm_page_t m)
2006 {
2007 assert(!m->free);
2008 assert(m->fictitious);
2009 assert(m->phys_page == vm_page_fictitious_addr ||
2010 m->phys_page == vm_page_guard_addr);
2011
2012 c_vm_page_release_fictitious++;
2013
2014 zfree(vm_page_zone, m);
2015 }
2016
2017 /*
2018 * vm_page_more_fictitious:
2019 *
2020 * Add more fictitious pages to the zone.
2021 * Allowed to block. This routine is way intimate
2022 * with the zones code, for several reasons:
2023 * 1. we need to carve some page structures out of physical
2024 * memory before zones work, so they _cannot_ come from
2025 * the zone_map.
2026 * 2. the zone needs to be collectable in order to prevent
2027 * growth without bound. These structures are used by
2028 * the device pager (by the hundreds and thousands), as
2029 * private pages for pageout, and as blocking pages for
2030 * pagein. Temporary bursts in demand should not result in
2031 * permanent allocation of a resource.
2032 * 3. To smooth allocation humps, we allocate single pages
2033 * with kernel_memory_allocate(), and cram them into the
2034 * zone.
2035 */
2036
2037 void vm_page_more_fictitious(void)
2038 {
2039 vm_offset_t addr;
2040 kern_return_t retval;
2041
2042 c_vm_page_more_fictitious++;
2043
2044 /*
2045 * Allocate a single page from the zone_map. Do not wait if no physical
2046 * pages are immediately available, and do not zero the space. We need
2047 * our own blocking lock here to prevent having multiple,
2048 * simultaneous requests from piling up on the zone_map lock. Exactly
2049 * one (of our) threads should be potentially waiting on the map lock.
2050 * If winner is not vm-privileged, then the page allocation will fail,
2051 * and it will temporarily block here in the vm_page_wait().
2052 */
2053 lck_mtx_lock(&vm_page_alloc_lock);
2054 /*
2055 * If another thread allocated space, just bail out now.
2056 */
2057 if (zone_free_count(vm_page_zone) > 5) {
2058 /*
2059 * The number "5" is a small number that is larger than the
2060 * number of fictitious pages that any single caller will
2061 * attempt to allocate. Otherwise, a thread will attempt to
2062 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2063 * release all of the resources and locks already acquired,
2064 * and then call this routine. This routine finds the pages
2065 * that the caller released, so fails to allocate new space.
2066 * The process repeats infinitely. The largest known number
2067 * of fictitious pages required in this manner is 2. 5 is
2068 * simply a somewhat larger number.
2069 */
2070 lck_mtx_unlock(&vm_page_alloc_lock);
2071 return;
2072 }
2073
2074 retval = kernel_memory_allocate(zone_map,
2075 &addr, PAGE_SIZE, VM_PROT_ALL,
2076 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2077 if (retval != KERN_SUCCESS) {
2078 /*
2079 * No page was available. Drop the
2080 * lock to give another thread a chance at it, and
2081 * wait for the pageout daemon to make progress.
2082 */
2083 lck_mtx_unlock(&vm_page_alloc_lock);
2084 vm_page_wait(THREAD_UNINT);
2085 return;
2086 }
2087
2088 zcram(vm_page_zone, addr, PAGE_SIZE);
2089
2090 lck_mtx_unlock(&vm_page_alloc_lock);
2091 }
2092
2093
2094 /*
2095 * vm_pool_low():
2096 *
2097 * Return true if it is not likely that a non-vm_privileged thread
2098 * can get memory without blocking. Advisory only, since the
2099 * situation may change under us.
2100 */
2101 int
2102 vm_pool_low(void)
2103 {
2104 /* No locking, at worst we will fib. */
2105 return( vm_page_free_count <= vm_page_free_reserved );
2106 }
2107
2108
2109
2110 /*
2111 * this is an interface to support bring-up of drivers
2112 * on platforms with physical memory > 4G...
2113 */
2114 int vm_himemory_mode = 2;
2115
2116
2117 /*
2118 * this interface exists to support hardware controllers
2119 * incapable of generating DMAs with more than 32 bits
2120 * of address on platforms with physical memory > 4G...
2121 */
2122 unsigned int vm_lopages_allocated_q = 0;
2123 unsigned int vm_lopages_allocated_cpm_success = 0;
2124 unsigned int vm_lopages_allocated_cpm_failed = 0;
2125 queue_head_t vm_lopage_queue_free;
2126
2127 vm_page_t
2128 vm_page_grablo(void)
2129 {
2130 vm_page_t mem;
2131
2132 if (vm_lopage_needed == FALSE)
2133 return (vm_page_grab());
2134
2135 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2136
2137 if ( !queue_empty(&vm_lopage_queue_free)) {
2138 queue_remove_first(&vm_lopage_queue_free,
2139 mem,
2140 vm_page_t,
2141 pageq);
2142 assert(vm_lopage_free_count);
2143
2144 vm_lopage_free_count--;
2145 vm_lopages_allocated_q++;
2146
2147 if (vm_lopage_free_count < vm_lopage_lowater)
2148 vm_lopage_refill = TRUE;
2149
2150 lck_mtx_unlock(&vm_page_queue_free_lock);
2151 } else {
2152 lck_mtx_unlock(&vm_page_queue_free_lock);
2153
2154 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2155
2156 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2157 vm_lopages_allocated_cpm_failed++;
2158 lck_mtx_unlock(&vm_page_queue_free_lock);
2159
2160 return (VM_PAGE_NULL);
2161 }
2162 mem->busy = TRUE;
2163
2164 vm_page_lockspin_queues();
2165
2166 mem->gobbled = FALSE;
2167 vm_page_gobble_count--;
2168 vm_page_wire_count--;
2169
2170 vm_lopages_allocated_cpm_success++;
2171 vm_page_unlock_queues();
2172 }
2173 assert(mem->busy);
2174 assert(!mem->free);
2175 assert(!mem->pmapped);
2176 assert(!mem->wpmapped);
2177 assert(!pmap_is_noencrypt(mem->phys_page));
2178
2179 mem->pageq.next = NULL;
2180 mem->pageq.prev = NULL;
2181
2182 return (mem);
2183 }
2184
2185
2186 /*
2187 * vm_page_grab:
2188 *
2189 * first try to grab a page from the per-cpu free list...
2190 * this must be done while pre-emption is disabled... if
2191 * a page is available, we're done...
2192 * if no page is available, grab the vm_page_queue_free_lock
2193 * and see if current number of free pages would allow us
2194 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2195 * if there are pages available, disable preemption and
2196 * recheck the state of the per-cpu free list... we could
2197 * have been preempted and moved to a different cpu, or
2198 * some other thread could have re-filled it... if still
2199 * empty, figure out how many pages we can steal from the
2200 * global free queue and move to the per-cpu queue...
2201 * return 1 of these pages when done... only wakeup the
2202 * pageout_scan thread if we moved pages from the global
2203 * list... no need for the wakeup if we've satisfied the
2204 * request from the per-cpu queue.
2205 */
2206
2207
2208 vm_page_t
2209 vm_page_grab( void )
2210 {
2211 vm_page_t mem;
2212
2213
2214 disable_preemption();
2215
2216 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2217 return_page_from_cpu_list:
2218 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2219 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2220
2221 enable_preemption();
2222 mem->pageq.next = NULL;
2223
2224 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2225 assert(mem->tabled == FALSE);
2226 assert(mem->object == VM_OBJECT_NULL);
2227 assert(!mem->laundry);
2228 assert(!mem->free);
2229 assert(pmap_verify_free(mem->phys_page));
2230 assert(mem->busy);
2231 assert(!mem->encrypted);
2232 assert(!mem->pmapped);
2233 assert(!mem->wpmapped);
2234 assert(!mem->active);
2235 assert(!mem->inactive);
2236 assert(!mem->throttled);
2237 assert(!mem->speculative);
2238 assert(!pmap_is_noencrypt(mem->phys_page));
2239
2240 return mem;
2241 }
2242 enable_preemption();
2243
2244
2245 /*
2246 * Optionally produce warnings if the wire or gobble
2247 * counts exceed some threshold.
2248 */
2249 #if VM_PAGE_WIRE_COUNT_WARNING
2250 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2251 printf("mk: vm_page_grab(): high wired page count of %d\n",
2252 vm_page_wire_count);
2253 }
2254 #endif
2255 #if VM_PAGE_GOBBLE_COUNT_WARNING
2256 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2257 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2258 vm_page_gobble_count);
2259 }
2260 #endif
2261 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2262
2263 /*
2264 * Only let privileged threads (involved in pageout)
2265 * dip into the reserved pool.
2266 */
2267 if ((vm_page_free_count < vm_page_free_reserved) &&
2268 !(current_thread()->options & TH_OPT_VMPRIV)) {
2269 lck_mtx_unlock(&vm_page_queue_free_lock);
2270 mem = VM_PAGE_NULL;
2271 }
2272 else {
2273 vm_page_t head;
2274 vm_page_t tail;
2275 unsigned int pages_to_steal;
2276 unsigned int color;
2277
2278 while ( vm_page_free_count == 0 ) {
2279
2280 lck_mtx_unlock(&vm_page_queue_free_lock);
2281 /*
2282 * must be a privileged thread to be
2283 * in this state since a non-privileged
2284 * thread would have bailed if we were
2285 * under the vm_page_free_reserved mark
2286 */
2287 VM_PAGE_WAIT();
2288 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2289 }
2290
2291 disable_preemption();
2292
2293 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2294 lck_mtx_unlock(&vm_page_queue_free_lock);
2295
2296 /*
2297 * we got preempted and moved to another processor
2298 * or we got preempted and someone else ran and filled the cache
2299 */
2300 goto return_page_from_cpu_list;
2301 }
2302 if (vm_page_free_count <= vm_page_free_reserved)
2303 pages_to_steal = 1;
2304 else {
2305 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2306 pages_to_steal = vm_free_magazine_refill_limit;
2307 else
2308 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2309 }
2310 color = PROCESSOR_DATA(current_processor(), start_color);
2311 head = tail = NULL;
2312
2313 vm_page_free_count -= pages_to_steal;
2314
2315 while (pages_to_steal--) {
2316
2317 while (queue_empty(&vm_page_queue_free[color]))
2318 color = (color + 1) & vm_color_mask;
2319
2320 queue_remove_first(&vm_page_queue_free[color],
2321 mem,
2322 vm_page_t,
2323 pageq);
2324 mem->pageq.next = NULL;
2325 mem->pageq.prev = NULL;
2326
2327 assert(!mem->active);
2328 assert(!mem->inactive);
2329 assert(!mem->throttled);
2330 assert(!mem->speculative);
2331
2332 color = (color + 1) & vm_color_mask;
2333
2334 if (head == NULL)
2335 head = mem;
2336 else
2337 tail->pageq.next = (queue_t)mem;
2338 tail = mem;
2339
2340 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2341 assert(mem->tabled == FALSE);
2342 assert(mem->object == VM_OBJECT_NULL);
2343 assert(!mem->laundry);
2344 assert(mem->free);
2345 mem->free = FALSE;
2346
2347 assert(pmap_verify_free(mem->phys_page));
2348 assert(mem->busy);
2349 assert(!mem->free);
2350 assert(!mem->encrypted);
2351 assert(!mem->pmapped);
2352 assert(!mem->wpmapped);
2353 assert(!pmap_is_noencrypt(mem->phys_page));
2354 }
2355 lck_mtx_unlock(&vm_page_queue_free_lock);
2356
2357 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2358 PROCESSOR_DATA(current_processor(), start_color) = color;
2359
2360 /*
2361 * satisfy this request
2362 */
2363 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2364 mem = head;
2365 mem->pageq.next = NULL;
2366
2367 enable_preemption();
2368 }
2369 /*
2370 * Decide if we should poke the pageout daemon.
2371 * We do this if the free count is less than the low
2372 * water mark, or if the free count is less than the high
2373 * water mark (but above the low water mark) and the inactive
2374 * count is less than its target.
2375 *
2376 * We don't have the counts locked ... if they change a little,
2377 * it doesn't really matter.
2378 */
2379 if ((vm_page_free_count < vm_page_free_min) ||
2380 ((vm_page_free_count < vm_page_free_target) &&
2381 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2382 thread_wakeup((event_t) &vm_page_free_wanted);
2383
2384 VM_CHECK_MEMORYSTATUS;
2385
2386 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2387
2388 return mem;
2389 }
2390
2391 /*
2392 * vm_page_release:
2393 *
2394 * Return a page to the free list.
2395 */
2396
2397 void
2398 vm_page_release(
2399 register vm_page_t mem)
2400 {
2401 unsigned int color;
2402 int need_wakeup = 0;
2403 int need_priv_wakeup = 0;
2404
2405
2406 assert(!mem->private && !mem->fictitious);
2407 if (vm_page_free_verify) {
2408 assert(pmap_verify_free(mem->phys_page));
2409 }
2410 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2411
2412 pmap_clear_noencrypt(mem->phys_page);
2413
2414 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2415 #if DEBUG
2416 if (mem->free)
2417 panic("vm_page_release");
2418 #endif
2419
2420 assert(mem->busy);
2421 assert(!mem->laundry);
2422 assert(mem->object == VM_OBJECT_NULL);
2423 assert(mem->pageq.next == NULL &&
2424 mem->pageq.prev == NULL);
2425 assert(mem->listq.next == NULL &&
2426 mem->listq.prev == NULL);
2427
2428 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2429 vm_lopage_free_count < vm_lopage_free_limit &&
2430 mem->phys_page < max_valid_low_ppnum) {
2431 /*
2432 * this exists to support hardware controllers
2433 * incapable of generating DMAs with more than 32 bits
2434 * of address on platforms with physical memory > 4G...
2435 */
2436 queue_enter_first(&vm_lopage_queue_free,
2437 mem,
2438 vm_page_t,
2439 pageq);
2440 vm_lopage_free_count++;
2441
2442 if (vm_lopage_free_count >= vm_lopage_free_limit)
2443 vm_lopage_refill = FALSE;
2444
2445 mem->lopage = TRUE;
2446 } else {
2447 mem->lopage = FALSE;
2448 mem->free = TRUE;
2449
2450 color = mem->phys_page & vm_color_mask;
2451 queue_enter_first(&vm_page_queue_free[color],
2452 mem,
2453 vm_page_t,
2454 pageq);
2455 vm_page_free_count++;
2456 /*
2457 * Check if we should wake up someone waiting for page.
2458 * But don't bother waking them unless they can allocate.
2459 *
2460 * We wakeup only one thread, to prevent starvation.
2461 * Because the scheduling system handles wait queues FIFO,
2462 * if we wakeup all waiting threads, one greedy thread
2463 * can starve multiple niceguy threads. When the threads
2464 * all wakeup, the greedy threads runs first, grabs the page,
2465 * and waits for another page. It will be the first to run
2466 * when the next page is freed.
2467 *
2468 * However, there is a slight danger here.
2469 * The thread we wake might not use the free page.
2470 * Then the other threads could wait indefinitely
2471 * while the page goes unused. To forestall this,
2472 * the pageout daemon will keep making free pages
2473 * as long as vm_page_free_wanted is non-zero.
2474 */
2475
2476 assert(vm_page_free_count > 0);
2477 if (vm_page_free_wanted_privileged > 0) {
2478 vm_page_free_wanted_privileged--;
2479 need_priv_wakeup = 1;
2480 } else if (vm_page_free_wanted > 0 &&
2481 vm_page_free_count > vm_page_free_reserved) {
2482 vm_page_free_wanted--;
2483 need_wakeup = 1;
2484 }
2485 }
2486 lck_mtx_unlock(&vm_page_queue_free_lock);
2487
2488 if (need_priv_wakeup)
2489 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2490 else if (need_wakeup)
2491 thread_wakeup_one((event_t) &vm_page_free_count);
2492
2493 VM_CHECK_MEMORYSTATUS;
2494 }
2495
2496 /*
2497 * This version of vm_page_release() is used only at startup
2498 * when we are single-threaded and pages are being released
2499 * for the first time. Hence, no locking or unnecessary checks are made.
2500 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2501 */
2502 void
2503 vm_page_release_startup(
2504 register vm_page_t mem)
2505 {
2506 queue_t queue_free;
2507
2508 if (vm_lopage_free_count < vm_lopage_free_limit &&
2509 mem->phys_page < max_valid_low_ppnum) {
2510 mem->lopage = TRUE;
2511 vm_lopage_free_count++;
2512 queue_free = &vm_lopage_queue_free;
2513 } else {
2514 mem->lopage = FALSE;
2515 mem->free = TRUE;
2516 vm_page_free_count++;
2517 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2518 }
2519 queue_enter_first(queue_free, mem, vm_page_t, pageq);
2520 }
2521
2522 /*
2523 * vm_page_wait:
2524 *
2525 * Wait for a page to become available.
2526 * If there are plenty of free pages, then we don't sleep.
2527 *
2528 * Returns:
2529 * TRUE: There may be another page, try again
2530 * FALSE: We were interrupted out of our wait, don't try again
2531 */
2532
2533 boolean_t
2534 vm_page_wait(
2535 int interruptible )
2536 {
2537 /*
2538 * We can't use vm_page_free_reserved to make this
2539 * determination. Consider: some thread might
2540 * need to allocate two pages. The first allocation
2541 * succeeds, the second fails. After the first page is freed,
2542 * a call to vm_page_wait must really block.
2543 */
2544 kern_return_t wait_result;
2545 int need_wakeup = 0;
2546 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2547
2548 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2549
2550 if (is_privileged && vm_page_free_count) {
2551 lck_mtx_unlock(&vm_page_queue_free_lock);
2552 return TRUE;
2553 }
2554 if (vm_page_free_count < vm_page_free_target) {
2555
2556 if (is_privileged) {
2557 if (vm_page_free_wanted_privileged++ == 0)
2558 need_wakeup = 1;
2559 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2560 } else {
2561 if (vm_page_free_wanted++ == 0)
2562 need_wakeup = 1;
2563 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2564 }
2565 lck_mtx_unlock(&vm_page_queue_free_lock);
2566 counter(c_vm_page_wait_block++);
2567
2568 if (need_wakeup)
2569 thread_wakeup((event_t)&vm_page_free_wanted);
2570
2571 if (wait_result == THREAD_WAITING) {
2572 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2573 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2574 wait_result = thread_block(THREAD_CONTINUE_NULL);
2575 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2576 }
2577
2578 return(wait_result == THREAD_AWAKENED);
2579 } else {
2580 lck_mtx_unlock(&vm_page_queue_free_lock);
2581 return TRUE;
2582 }
2583 }
2584
2585 /*
2586 * vm_page_alloc:
2587 *
2588 * Allocate and return a memory cell associated
2589 * with this VM object/offset pair.
2590 *
2591 * Object must be locked.
2592 */
2593
2594 vm_page_t
2595 vm_page_alloc(
2596 vm_object_t object,
2597 vm_object_offset_t offset)
2598 {
2599 register vm_page_t mem;
2600
2601 vm_object_lock_assert_exclusive(object);
2602 mem = vm_page_grab();
2603 if (mem == VM_PAGE_NULL)
2604 return VM_PAGE_NULL;
2605
2606 vm_page_insert(mem, object, offset);
2607
2608 return(mem);
2609 }
2610
2611 /*
2612 * vm_page_alloc_guard:
2613 *
2614 * Allocate a fictitious page which will be used
2615 * as a guard page. The page will be inserted into
2616 * the object and returned to the caller.
2617 */
2618
2619 vm_page_t
2620 vm_page_alloc_guard(
2621 vm_object_t object,
2622 vm_object_offset_t offset)
2623 {
2624 register vm_page_t mem;
2625
2626 vm_object_lock_assert_exclusive(object);
2627 mem = vm_page_grab_guard();
2628 if (mem == VM_PAGE_NULL)
2629 return VM_PAGE_NULL;
2630
2631 vm_page_insert(mem, object, offset);
2632
2633 return(mem);
2634 }
2635
2636
2637 counter(unsigned int c_laundry_pages_freed = 0;)
2638
2639 /*
2640 * vm_page_free_prepare:
2641 *
2642 * Removes page from any queue it may be on
2643 * and disassociates it from its VM object.
2644 *
2645 * Object and page queues must be locked prior to entry.
2646 */
2647 static void
2648 vm_page_free_prepare(
2649 vm_page_t mem)
2650 {
2651 vm_page_free_prepare_queues(mem);
2652 vm_page_free_prepare_object(mem, TRUE);
2653 }
2654
2655
2656 void
2657 vm_page_free_prepare_queues(
2658 vm_page_t mem)
2659 {
2660 VM_PAGE_CHECK(mem);
2661 assert(!mem->free);
2662 assert(!mem->cleaning);
2663
2664 #if MACH_ASSERT || DEBUG
2665 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2666 if (mem->free)
2667 panic("vm_page_free: freeing page on free list\n");
2668 #endif /* MACH_ASSERT || DEBUG */
2669 if (mem->object) {
2670 vm_object_lock_assert_exclusive(mem->object);
2671 }
2672 if (mem->laundry) {
2673 /*
2674 * We may have to free a page while it's being laundered
2675 * if we lost its pager (due to a forced unmount, for example).
2676 * We need to call vm_pageout_steal_laundry() before removing
2677 * the page from its VM object, so that we can remove it
2678 * from its pageout queue and adjust the laundry accounting
2679 */
2680 vm_pageout_steal_laundry(mem, TRUE);
2681 counter(++c_laundry_pages_freed);
2682 }
2683
2684 vm_page_queues_remove(mem); /* clears local/active/inactive/throttled/speculative */
2685
2686 if (VM_PAGE_WIRED(mem)) {
2687 if (mem->object) {
2688 assert(mem->object->wired_page_count > 0);
2689 mem->object->wired_page_count--;
2690 if (!mem->object->wired_page_count) {
2691 VM_OBJECT_UNWIRED(mem->object);
2692 }
2693
2694 assert(mem->object->resident_page_count >=
2695 mem->object->wired_page_count);
2696
2697 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2698 OSAddAtomic(+1, &vm_page_purgeable_count);
2699 assert(vm_page_purgeable_wired_count > 0);
2700 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2701 }
2702 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2703 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2704 mem->object->vo_purgeable_owner != TASK_NULL) {
2705 task_t owner;
2706
2707 owner = mem->object->vo_purgeable_owner;
2708 /*
2709 * While wired, this page was accounted
2710 * as "non-volatile" but it should now
2711 * be accounted as "volatile".
2712 */
2713 /* one less "non-volatile"... */
2714 ledger_debit(owner->ledger,
2715 task_ledgers.purgeable_nonvolatile,
2716 PAGE_SIZE);
2717 /* ... and "phys_footprint" */
2718 ledger_debit(owner->ledger,
2719 task_ledgers.phys_footprint,
2720 PAGE_SIZE);
2721 /* one more "volatile" */
2722 ledger_credit(owner->ledger,
2723 task_ledgers.purgeable_volatile,
2724 PAGE_SIZE);
2725 }
2726 }
2727 if (!mem->private && !mem->fictitious)
2728 vm_page_wire_count--;
2729 mem->wire_count = 0;
2730 assert(!mem->gobbled);
2731 } else if (mem->gobbled) {
2732 if (!mem->private && !mem->fictitious)
2733 vm_page_wire_count--;
2734 vm_page_gobble_count--;
2735 }
2736 }
2737
2738
2739 void
2740 vm_page_free_prepare_object(
2741 vm_page_t mem,
2742 boolean_t remove_from_hash)
2743 {
2744 if (mem->tabled)
2745 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2746
2747 PAGE_WAKEUP(mem); /* clears wanted */
2748
2749 if (mem->private) {
2750 mem->private = FALSE;
2751 mem->fictitious = TRUE;
2752 mem->phys_page = vm_page_fictitious_addr;
2753 }
2754 if ( !mem->fictitious) {
2755 vm_page_init(mem, mem->phys_page, mem->lopage);
2756 }
2757 }
2758
2759
2760 /*
2761 * vm_page_free:
2762 *
2763 * Returns the given page to the free list,
2764 * disassociating it with any VM object.
2765 *
2766 * Object and page queues must be locked prior to entry.
2767 */
2768 void
2769 vm_page_free(
2770 vm_page_t mem)
2771 {
2772 vm_page_free_prepare(mem);
2773
2774 if (mem->fictitious) {
2775 vm_page_release_fictitious(mem);
2776 } else {
2777 vm_page_release(mem);
2778 }
2779 }
2780
2781
2782 void
2783 vm_page_free_unlocked(
2784 vm_page_t mem,
2785 boolean_t remove_from_hash)
2786 {
2787 vm_page_lockspin_queues();
2788 vm_page_free_prepare_queues(mem);
2789 vm_page_unlock_queues();
2790
2791 vm_page_free_prepare_object(mem, remove_from_hash);
2792
2793 if (mem->fictitious) {
2794 vm_page_release_fictitious(mem);
2795 } else {
2796 vm_page_release(mem);
2797 }
2798 }
2799
2800
2801 /*
2802 * Free a list of pages. The list can be up to several hundred pages,
2803 * as blocked up by vm_pageout_scan().
2804 * The big win is not having to take the free list lock once
2805 * per page.
2806 */
2807 void
2808 vm_page_free_list(
2809 vm_page_t freeq,
2810 boolean_t prepare_object)
2811 {
2812 vm_page_t mem;
2813 vm_page_t nxt;
2814 vm_page_t local_freeq;
2815 int pg_count;
2816
2817 while (freeq) {
2818
2819 pg_count = 0;
2820 local_freeq = VM_PAGE_NULL;
2821 mem = freeq;
2822
2823 /*
2824 * break up the processing into smaller chunks so
2825 * that we can 'pipeline' the pages onto the
2826 * free list w/o introducing too much
2827 * contention on the global free queue lock
2828 */
2829 while (mem && pg_count < 64) {
2830
2831 assert(!mem->inactive);
2832 assert(!mem->active);
2833 assert(!mem->throttled);
2834 assert(!mem->free);
2835 assert(!mem->speculative);
2836 assert(!VM_PAGE_WIRED(mem));
2837 assert(mem->pageq.prev == NULL);
2838
2839 nxt = (vm_page_t)(mem->pageq.next);
2840
2841 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2842 assert(pmap_verify_free(mem->phys_page));
2843 }
2844 if (prepare_object == TRUE)
2845 vm_page_free_prepare_object(mem, TRUE);
2846
2847 if (!mem->fictitious) {
2848 assert(mem->busy);
2849
2850 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2851 vm_lopage_free_count < vm_lopage_free_limit &&
2852 mem->phys_page < max_valid_low_ppnum) {
2853 mem->pageq.next = NULL;
2854 vm_page_release(mem);
2855 } else {
2856 /*
2857 * IMPORTANT: we can't set the page "free" here
2858 * because that would make the page eligible for
2859 * a physically-contiguous allocation (see
2860 * vm_page_find_contiguous()) right away (we don't
2861 * hold the vm_page_queue_free lock). That would
2862 * cause trouble because the page is not actually
2863 * in the free queue yet...
2864 */
2865 mem->pageq.next = (queue_entry_t)local_freeq;
2866 local_freeq = mem;
2867 pg_count++;
2868
2869 pmap_clear_noencrypt(mem->phys_page);
2870 }
2871 } else {
2872 assert(mem->phys_page == vm_page_fictitious_addr ||
2873 mem->phys_page == vm_page_guard_addr);
2874 vm_page_release_fictitious(mem);
2875 }
2876 mem = nxt;
2877 }
2878 freeq = mem;
2879
2880 if ( (mem = local_freeq) ) {
2881 unsigned int avail_free_count;
2882 unsigned int need_wakeup = 0;
2883 unsigned int need_priv_wakeup = 0;
2884
2885 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2886
2887 while (mem) {
2888 int color;
2889
2890 nxt = (vm_page_t)(mem->pageq.next);
2891
2892 assert(!mem->free);
2893 assert(mem->busy);
2894 mem->free = TRUE;
2895
2896 color = mem->phys_page & vm_color_mask;
2897 queue_enter_first(&vm_page_queue_free[color],
2898 mem,
2899 vm_page_t,
2900 pageq);
2901 mem = nxt;
2902 }
2903 vm_page_free_count += pg_count;
2904 avail_free_count = vm_page_free_count;
2905
2906 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2907
2908 if (avail_free_count < vm_page_free_wanted_privileged) {
2909 need_priv_wakeup = avail_free_count;
2910 vm_page_free_wanted_privileged -= avail_free_count;
2911 avail_free_count = 0;
2912 } else {
2913 need_priv_wakeup = vm_page_free_wanted_privileged;
2914 vm_page_free_wanted_privileged = 0;
2915 avail_free_count -= vm_page_free_wanted_privileged;
2916 }
2917 }
2918 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2919 unsigned int available_pages;
2920
2921 available_pages = avail_free_count - vm_page_free_reserved;
2922
2923 if (available_pages >= vm_page_free_wanted) {
2924 need_wakeup = vm_page_free_wanted;
2925 vm_page_free_wanted = 0;
2926 } else {
2927 need_wakeup = available_pages;
2928 vm_page_free_wanted -= available_pages;
2929 }
2930 }
2931 lck_mtx_unlock(&vm_page_queue_free_lock);
2932
2933 if (need_priv_wakeup != 0) {
2934 /*
2935 * There shouldn't be that many VM-privileged threads,
2936 * so let's wake them all up, even if we don't quite
2937 * have enough pages to satisfy them all.
2938 */
2939 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2940 }
2941 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2942 /*
2943 * We don't expect to have any more waiters
2944 * after this, so let's wake them all up at
2945 * once.
2946 */
2947 thread_wakeup((event_t) &vm_page_free_count);
2948 } else for (; need_wakeup != 0; need_wakeup--) {
2949 /*
2950 * Wake up one waiter per page we just released.
2951 */
2952 thread_wakeup_one((event_t) &vm_page_free_count);
2953 }
2954
2955 VM_CHECK_MEMORYSTATUS;
2956 }
2957 }
2958 }
2959
2960
2961 /*
2962 * vm_page_wire:
2963 *
2964 * Mark this page as wired down by yet
2965 * another map, removing it from paging queues
2966 * as necessary.
2967 *
2968 * The page's object and the page queues must be locked.
2969 */
2970
2971
2972 void
2973 vm_page_wire(
2974 register vm_page_t mem,
2975 vm_tag_t tag,
2976 boolean_t check_memorystatus)
2977 {
2978
2979 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2980
2981 VM_PAGE_CHECK(mem);
2982 if (mem->object) {
2983 vm_object_lock_assert_exclusive(mem->object);
2984 } else {
2985 /*
2986 * In theory, the page should be in an object before it
2987 * gets wired, since we need to hold the object lock
2988 * to update some fields in the page structure.
2989 * However, some code (i386 pmap, for example) might want
2990 * to wire a page before it gets inserted into an object.
2991 * That's somewhat OK, as long as nobody else can get to
2992 * that page and update it at the same time.
2993 */
2994 }
2995 #if DEBUG
2996 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2997 #endif
2998 if ( !VM_PAGE_WIRED(mem)) {
2999
3000 if (mem->pageout_queue) {
3001 mem->pageout = FALSE;
3002 vm_pageout_throttle_up(mem);
3003 }
3004 vm_page_queues_remove(mem);
3005
3006 if (mem->object) {
3007
3008 if (!mem->private && !mem->fictitious)
3009 {
3010 if (!mem->object->wired_page_count)
3011 {
3012 assert(VM_KERN_MEMORY_NONE != tag);
3013 mem->object->wire_tag = tag;
3014 VM_OBJECT_WIRED(mem->object);
3015 }
3016 }
3017 mem->object->wired_page_count++;
3018
3019 assert(mem->object->resident_page_count >=
3020 mem->object->wired_page_count);
3021 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
3022 assert(vm_page_purgeable_count > 0);
3023 OSAddAtomic(-1, &vm_page_purgeable_count);
3024 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3025 }
3026 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
3027 mem->object->purgable == VM_PURGABLE_EMPTY) &&
3028 mem->object->vo_purgeable_owner != TASK_NULL) {
3029 task_t owner;
3030
3031 owner = mem->object->vo_purgeable_owner;
3032 /* less volatile bytes */
3033 ledger_debit(owner->ledger,
3034 task_ledgers.purgeable_volatile,
3035 PAGE_SIZE);
3036 /* more not-quite-volatile bytes */
3037 ledger_credit(owner->ledger,
3038 task_ledgers.purgeable_nonvolatile,
3039 PAGE_SIZE);
3040 /* more footprint */
3041 ledger_credit(owner->ledger,
3042 task_ledgers.phys_footprint,
3043 PAGE_SIZE);
3044 }
3045 if (mem->object->all_reusable) {
3046 /*
3047 * Wired pages are not counted as "re-usable"
3048 * in "all_reusable" VM objects, so nothing
3049 * to do here.
3050 */
3051 } else if (mem->reusable) {
3052 /*
3053 * This page is not "re-usable" when it's
3054 * wired, so adjust its state and the
3055 * accounting.
3056 */
3057 vm_object_reuse_pages(mem->object,
3058 mem->offset,
3059 mem->offset+PAGE_SIZE_64,
3060 FALSE);
3061 }
3062 }
3063 assert(!mem->reusable);
3064
3065 if (!mem->private && !mem->fictitious && !mem->gobbled)
3066 vm_page_wire_count++;
3067 if (mem->gobbled)
3068 vm_page_gobble_count--;
3069 mem->gobbled = FALSE;
3070
3071 if (check_memorystatus == TRUE) {
3072 VM_CHECK_MEMORYSTATUS;
3073 }
3074 /*
3075 * ENCRYPTED SWAP:
3076 * The page could be encrypted, but
3077 * We don't have to decrypt it here
3078 * because we don't guarantee that the
3079 * data is actually valid at this point.
3080 * The page will get decrypted in
3081 * vm_fault_wire() if needed.
3082 */
3083 }
3084 assert(!mem->gobbled);
3085 mem->wire_count++;
3086 VM_PAGE_CHECK(mem);
3087 }
3088
3089 /*
3090 * vm_page_unwire:
3091 *
3092 * Release one wiring of this page, potentially
3093 * enabling it to be paged again.
3094 *
3095 * The page's object and the page queues must be locked.
3096 */
3097 void
3098 vm_page_unwire(
3099 vm_page_t mem,
3100 boolean_t queueit)
3101 {
3102
3103 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
3104
3105 VM_PAGE_CHECK(mem);
3106 assert(VM_PAGE_WIRED(mem));
3107 assert(!mem->gobbled);
3108 assert(mem->object != VM_OBJECT_NULL);
3109 #if DEBUG
3110 vm_object_lock_assert_exclusive(mem->object);
3111 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3112 #endif
3113 if (--mem->wire_count == 0) {
3114 if (!mem->private && !mem->fictitious) {
3115 vm_page_wire_count--;
3116 }
3117 assert(mem->object->wired_page_count > 0);
3118 mem->object->wired_page_count--;
3119 if (!mem->object->wired_page_count) {
3120 VM_OBJECT_UNWIRED(mem->object);
3121 }
3122 assert(mem->object->resident_page_count >=
3123 mem->object->wired_page_count);
3124 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
3125 OSAddAtomic(+1, &vm_page_purgeable_count);
3126 assert(vm_page_purgeable_wired_count > 0);
3127 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3128 }
3129 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
3130 mem->object->purgable == VM_PURGABLE_EMPTY) &&
3131 mem->object->vo_purgeable_owner != TASK_NULL) {
3132 task_t owner;
3133
3134 owner = mem->object->vo_purgeable_owner;
3135 /* more volatile bytes */
3136 ledger_credit(owner->ledger,
3137 task_ledgers.purgeable_volatile,
3138 PAGE_SIZE);
3139 /* less not-quite-volatile bytes */
3140 ledger_debit(owner->ledger,
3141 task_ledgers.purgeable_nonvolatile,
3142 PAGE_SIZE);
3143 /* less footprint */
3144 ledger_debit(owner->ledger,
3145 task_ledgers.phys_footprint,
3146 PAGE_SIZE);
3147 }
3148 assert(mem->object != kernel_object);
3149 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3150
3151 if (queueit == TRUE) {
3152 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3153 vm_page_deactivate(mem);
3154 } else {
3155 vm_page_activate(mem);
3156 }
3157 }
3158
3159 VM_CHECK_MEMORYSTATUS;
3160
3161 }
3162 VM_PAGE_CHECK(mem);
3163 }
3164
3165 /*
3166 * vm_page_deactivate:
3167 *
3168 * Returns the given page to the inactive list,
3169 * indicating that no physical maps have access
3170 * to this page. [Used by the physical mapping system.]
3171 *
3172 * The page queues must be locked.
3173 */
3174 void
3175 vm_page_deactivate(
3176 vm_page_t m)
3177 {
3178 vm_page_deactivate_internal(m, TRUE);
3179 }
3180
3181
3182 void
3183 vm_page_deactivate_internal(
3184 vm_page_t m,
3185 boolean_t clear_hw_reference)
3186 {
3187
3188 VM_PAGE_CHECK(m);
3189 assert(m->object != kernel_object);
3190 assert(m->phys_page != vm_page_guard_addr);
3191
3192 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3193 #if DEBUG
3194 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3195 #endif
3196 /*
3197 * This page is no longer very interesting. If it was
3198 * interesting (active or inactive/referenced), then we
3199 * clear the reference bit and (re)enter it in the
3200 * inactive queue. Note wired pages should not have
3201 * their reference bit cleared.
3202 */
3203 assert ( !(m->absent && !m->unusual));
3204
3205 if (m->gobbled) { /* can this happen? */
3206 assert( !VM_PAGE_WIRED(m));
3207
3208 if (!m->private && !m->fictitious)
3209 vm_page_wire_count--;
3210 vm_page_gobble_count--;
3211 m->gobbled = FALSE;
3212 }
3213 /*
3214 * if this page is currently on the pageout queue, we can't do the
3215 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3216 * and we can't remove it manually since we would need the object lock
3217 * (which is not required here) to decrement the activity_in_progress
3218 * reference which is held on the object while the page is in the pageout queue...
3219 * just let the normal laundry processing proceed
3220 */
3221 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3222 return;
3223
3224 if (!m->absent && clear_hw_reference == TRUE)
3225 pmap_clear_reference(m->phys_page);
3226
3227 m->reference = FALSE;
3228 m->no_cache = FALSE;
3229
3230 if (!m->inactive) {
3231 vm_page_queues_remove(m);
3232
3233 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3234 m->dirty && m->object->internal &&
3235 (m->object->purgable == VM_PURGABLE_DENY ||
3236 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3237 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3238 vm_page_check_pageable_safe(m);
3239 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3240 m->throttled = TRUE;
3241 vm_page_throttled_count++;
3242 } else {
3243 if (m->object->named && m->object->ref_count == 1) {
3244 vm_page_speculate(m, FALSE);
3245 #if DEVELOPMENT || DEBUG
3246 vm_page_speculative_recreated++;
3247 #endif
3248 } else {
3249 vm_page_enqueue_inactive(m, FALSE);
3250 }
3251 }
3252 }
3253 }
3254
3255 /*
3256 * vm_page_enqueue_cleaned
3257 *
3258 * Put the page on the cleaned queue, mark it cleaned, etc.
3259 * Being on the cleaned queue (and having m->clean_queue set)
3260 * does ** NOT ** guarantee that the page is clean!
3261 *
3262 * Call with the queues lock held.
3263 */
3264
3265 void vm_page_enqueue_cleaned(vm_page_t m)
3266 {
3267 assert(m->phys_page != vm_page_guard_addr);
3268 #if DEBUG
3269 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3270 #endif
3271 assert( !(m->absent && !m->unusual));
3272
3273 if (m->gobbled) {
3274 assert( !VM_PAGE_WIRED(m));
3275 if (!m->private && !m->fictitious)
3276 vm_page_wire_count--;
3277 vm_page_gobble_count--;
3278 m->gobbled = FALSE;
3279 }
3280 /*
3281 * if this page is currently on the pageout queue, we can't do the
3282 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3283 * and we can't remove it manually since we would need the object lock
3284 * (which is not required here) to decrement the activity_in_progress
3285 * reference which is held on the object while the page is in the pageout queue...
3286 * just let the normal laundry processing proceed
3287 */
3288 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3289 return;
3290
3291 vm_page_queues_remove(m);
3292
3293 vm_page_check_pageable_safe(m);
3294 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3295 m->clean_queue = TRUE;
3296 vm_page_cleaned_count++;
3297
3298 m->inactive = TRUE;
3299 vm_page_inactive_count++;
3300 if (m->object->internal) {
3301 vm_page_pageable_internal_count++;
3302 } else {
3303 vm_page_pageable_external_count++;
3304 }
3305
3306 vm_pageout_enqueued_cleaned++;
3307 }
3308
3309 /*
3310 * vm_page_activate:
3311 *
3312 * Put the specified page on the active list (if appropriate).
3313 *
3314 * The page queues must be locked.
3315 */
3316
3317 void
3318 vm_page_activate(
3319 register vm_page_t m)
3320 {
3321 VM_PAGE_CHECK(m);
3322 #ifdef FIXME_4778297
3323 assert(m->object != kernel_object);
3324 #endif
3325 assert(m->phys_page != vm_page_guard_addr);
3326 #if DEBUG
3327 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3328 #endif
3329 assert( !(m->absent && !m->unusual));
3330
3331 if (m->gobbled) {
3332 assert( !VM_PAGE_WIRED(m));
3333 if (!m->private && !m->fictitious)
3334 vm_page_wire_count--;
3335 vm_page_gobble_count--;
3336 m->gobbled = FALSE;
3337 }
3338 /*
3339 * if this page is currently on the pageout queue, we can't do the
3340 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3341 * and we can't remove it manually since we would need the object lock
3342 * (which is not required here) to decrement the activity_in_progress
3343 * reference which is held on the object while the page is in the pageout queue...
3344 * just let the normal laundry processing proceed
3345 */
3346 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3347 return;
3348
3349 #if DEBUG
3350 if (m->active)
3351 panic("vm_page_activate: already active");
3352 #endif
3353
3354 if (m->speculative) {
3355 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3356 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3357 }
3358
3359 vm_page_queues_remove(m);
3360
3361 if ( !VM_PAGE_WIRED(m)) {
3362 vm_page_check_pageable_safe(m);
3363 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3364 m->dirty && m->object->internal &&
3365 (m->object->purgable == VM_PURGABLE_DENY ||
3366 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3367 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3368 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3369 m->throttled = TRUE;
3370 vm_page_throttled_count++;
3371 } else {
3372 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3373 m->active = TRUE;
3374 vm_page_active_count++;
3375 if (m->object->internal) {
3376 vm_page_pageable_internal_count++;
3377 } else {
3378 vm_page_pageable_external_count++;
3379 }
3380 }
3381 m->reference = TRUE;
3382 m->no_cache = FALSE;
3383 }
3384 VM_PAGE_CHECK(m);
3385 }
3386
3387
3388 /*
3389 * vm_page_speculate:
3390 *
3391 * Put the specified page on the speculative list (if appropriate).
3392 *
3393 * The page queues must be locked.
3394 */
3395 void
3396 vm_page_speculate(
3397 vm_page_t m,
3398 boolean_t new)
3399 {
3400 struct vm_speculative_age_q *aq;
3401
3402 VM_PAGE_CHECK(m);
3403 vm_page_check_pageable_safe(m);
3404
3405 assert(m->phys_page != vm_page_guard_addr);
3406 #if DEBUG
3407 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3408 #endif
3409 assert( !(m->absent && !m->unusual));
3410
3411 /*
3412 * if this page is currently on the pageout queue, we can't do the
3413 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3414 * and we can't remove it manually since we would need the object lock
3415 * (which is not required here) to decrement the activity_in_progress
3416 * reference which is held on the object while the page is in the pageout queue...
3417 * just let the normal laundry processing proceed
3418 */
3419 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3420 return;
3421
3422 vm_page_queues_remove(m);
3423
3424 if ( !VM_PAGE_WIRED(m)) {
3425 mach_timespec_t ts;
3426 clock_sec_t sec;
3427 clock_nsec_t nsec;
3428
3429 clock_get_system_nanotime(&sec, &nsec);
3430 ts.tv_sec = (unsigned int) sec;
3431 ts.tv_nsec = nsec;
3432
3433 if (vm_page_speculative_count == 0) {
3434
3435 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3436 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3437
3438 aq = &vm_page_queue_speculative[speculative_age_index];
3439
3440 /*
3441 * set the timer to begin a new group
3442 */
3443 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3444 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3445
3446 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3447 } else {
3448 aq = &vm_page_queue_speculative[speculative_age_index];
3449
3450 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3451
3452 speculative_age_index++;
3453
3454 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3455 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3456 if (speculative_age_index == speculative_steal_index) {
3457 speculative_steal_index = speculative_age_index + 1;
3458
3459 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3460 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3461 }
3462 aq = &vm_page_queue_speculative[speculative_age_index];
3463
3464 if (!queue_empty(&aq->age_q))
3465 vm_page_speculate_ageit(aq);
3466
3467 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3468 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3469
3470 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3471 }
3472 }
3473 enqueue_tail(&aq->age_q, &m->pageq);
3474 m->speculative = TRUE;
3475 vm_page_speculative_count++;
3476 if (m->object->internal) {
3477 vm_page_pageable_internal_count++;
3478 } else {
3479 vm_page_pageable_external_count++;
3480 }
3481
3482 if (new == TRUE) {
3483 vm_object_lock_assert_exclusive(m->object);
3484
3485 m->object->pages_created++;
3486 #if DEVELOPMENT || DEBUG
3487 vm_page_speculative_created++;
3488 #endif
3489 }
3490 }
3491 VM_PAGE_CHECK(m);
3492 }
3493
3494
3495 /*
3496 * move pages from the specified aging bin to
3497 * the speculative bin that pageout_scan claims from
3498 *
3499 * The page queues must be locked.
3500 */
3501 void
3502 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3503 {
3504 struct vm_speculative_age_q *sq;
3505 vm_page_t t;
3506
3507 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3508
3509 if (queue_empty(&sq->age_q)) {
3510 sq->age_q.next = aq->age_q.next;
3511 sq->age_q.prev = aq->age_q.prev;
3512
3513 t = (vm_page_t)sq->age_q.next;
3514 t->pageq.prev = &sq->age_q;
3515
3516 t = (vm_page_t)sq->age_q.prev;
3517 t->pageq.next = &sq->age_q;
3518 } else {
3519 t = (vm_page_t)sq->age_q.prev;
3520 t->pageq.next = aq->age_q.next;
3521
3522 t = (vm_page_t)aq->age_q.next;
3523 t->pageq.prev = sq->age_q.prev;
3524
3525 t = (vm_page_t)aq->age_q.prev;
3526 t->pageq.next = &sq->age_q;
3527
3528 sq->age_q.prev = aq->age_q.prev;
3529 }
3530 queue_init(&aq->age_q);
3531 }
3532
3533
3534 void
3535 vm_page_lru(
3536 vm_page_t m)
3537 {
3538 VM_PAGE_CHECK(m);
3539 assert(m->object != kernel_object);
3540 assert(m->phys_page != vm_page_guard_addr);
3541
3542 #if DEBUG
3543 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3544 #endif
3545 /*
3546 * if this page is currently on the pageout queue, we can't do the
3547 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3548 * and we can't remove it manually since we would need the object lock
3549 * (which is not required here) to decrement the activity_in_progress
3550 * reference which is held on the object while the page is in the pageout queue...
3551 * just let the normal laundry processing proceed
3552 */
3553 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3554 return;
3555
3556 m->no_cache = FALSE;
3557
3558 vm_page_queues_remove(m);
3559
3560 vm_page_enqueue_inactive(m, FALSE);
3561 }
3562
3563
3564 void
3565 vm_page_reactivate_all_throttled(void)
3566 {
3567 vm_page_t first_throttled, last_throttled;
3568 vm_page_t first_active;
3569 vm_page_t m;
3570 int extra_active_count;
3571 int extra_internal_count, extra_external_count;
3572
3573 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3574 return;
3575
3576 extra_active_count = 0;
3577 extra_internal_count = 0;
3578 extra_external_count = 0;
3579 vm_page_lock_queues();
3580 if (! queue_empty(&vm_page_queue_throttled)) {
3581 /*
3582 * Switch "throttled" pages to "active".
3583 */
3584 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3585 VM_PAGE_CHECK(m);
3586 assert(m->throttled);
3587 assert(!m->active);
3588 assert(!m->inactive);
3589 assert(!m->speculative);
3590 assert(!VM_PAGE_WIRED(m));
3591
3592 extra_active_count++;
3593 if (m->object->internal) {
3594 extra_internal_count++;
3595 } else {
3596 extra_external_count++;
3597 }
3598
3599 m->throttled = FALSE;
3600 m->active = TRUE;
3601 VM_PAGE_CHECK(m);
3602 }
3603
3604 /*
3605 * Transfer the entire throttled queue to a regular LRU page queues.
3606 * We insert it at the head of the active queue, so that these pages
3607 * get re-evaluated by the LRU algorithm first, since they've been
3608 * completely out of it until now.
3609 */
3610 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3611 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3612 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3613 if (queue_empty(&vm_page_queue_active)) {
3614 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3615 } else {
3616 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3617 }
3618 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3619 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3620 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3621
3622 #if DEBUG
3623 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3624 #endif
3625 queue_init(&vm_page_queue_throttled);
3626 /*
3627 * Adjust the global page counts.
3628 */
3629 vm_page_active_count += extra_active_count;
3630 vm_page_pageable_internal_count += extra_internal_count;
3631 vm_page_pageable_external_count += extra_external_count;
3632 vm_page_throttled_count = 0;
3633 }
3634 assert(vm_page_throttled_count == 0);
3635 assert(queue_empty(&vm_page_queue_throttled));
3636 vm_page_unlock_queues();
3637 }
3638
3639
3640 /*
3641 * move pages from the indicated local queue to the global active queue
3642 * its ok to fail if we're below the hard limit and force == FALSE
3643 * the nolocks == TRUE case is to allow this function to be run on
3644 * the hibernate path
3645 */
3646
3647 void
3648 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3649 {
3650 struct vpl *lq;
3651 vm_page_t first_local, last_local;
3652 vm_page_t first_active;
3653 vm_page_t m;
3654 uint32_t count = 0;
3655
3656 if (vm_page_local_q == NULL)
3657 return;
3658
3659 lq = &vm_page_local_q[lid].vpl_un.vpl;
3660
3661 if (nolocks == FALSE) {
3662 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3663 if ( !vm_page_trylockspin_queues())
3664 return;
3665 } else
3666 vm_page_lockspin_queues();
3667
3668 VPL_LOCK(&lq->vpl_lock);
3669 }
3670 if (lq->vpl_count) {
3671 /*
3672 * Switch "local" pages to "active".
3673 */
3674 assert(!queue_empty(&lq->vpl_queue));
3675
3676 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3677 VM_PAGE_CHECK(m);
3678 vm_page_check_pageable_safe(m);
3679 assert(m->local);
3680 assert(!m->active);
3681 assert(!m->inactive);
3682 assert(!m->speculative);
3683 assert(!VM_PAGE_WIRED(m));
3684 assert(!m->throttled);
3685 assert(!m->fictitious);
3686
3687 if (m->local_id != lid)
3688 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3689
3690 m->local_id = 0;
3691 m->local = FALSE;
3692 m->active = TRUE;
3693 VM_PAGE_CHECK(m);
3694
3695 count++;
3696 }
3697 if (count != lq->vpl_count)
3698 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3699
3700 /*
3701 * Transfer the entire local queue to a regular LRU page queues.
3702 */
3703 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3704 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3705 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3706
3707 if (queue_empty(&vm_page_queue_active)) {
3708 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3709 } else {
3710 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3711 }
3712 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3713 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3714 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3715
3716 queue_init(&lq->vpl_queue);
3717 /*
3718 * Adjust the global page counts.
3719 */
3720 vm_page_active_count += lq->vpl_count;
3721 vm_page_pageable_internal_count += lq->vpl_internal_count;
3722 vm_page_pageable_external_count += lq->vpl_external_count;
3723 lq->vpl_count = 0;
3724 lq->vpl_internal_count = 0;
3725 lq->vpl_external_count = 0;
3726 }
3727 assert(queue_empty(&lq->vpl_queue));
3728
3729 if (nolocks == FALSE) {
3730 VPL_UNLOCK(&lq->vpl_lock);
3731 vm_page_unlock_queues();
3732 }
3733 }
3734
3735 /*
3736 * vm_page_part_zero_fill:
3737 *
3738 * Zero-fill a part of the page.
3739 */
3740 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3741 void
3742 vm_page_part_zero_fill(
3743 vm_page_t m,
3744 vm_offset_t m_pa,
3745 vm_size_t len)
3746 {
3747
3748 #if 0
3749 /*
3750 * we don't hold the page queue lock
3751 * so this check isn't safe to make
3752 */
3753 VM_PAGE_CHECK(m);
3754 #endif
3755
3756 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3757 pmap_zero_part_page(m->phys_page, m_pa, len);
3758 #else
3759 vm_page_t tmp;
3760 while (1) {
3761 tmp = vm_page_grab();
3762 if (tmp == VM_PAGE_NULL) {
3763 vm_page_wait(THREAD_UNINT);
3764 continue;
3765 }
3766 break;
3767 }
3768 vm_page_zero_fill(tmp);
3769 if(m_pa != 0) {
3770 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3771 }
3772 if((m_pa + len) < PAGE_SIZE) {
3773 vm_page_part_copy(m, m_pa + len, tmp,
3774 m_pa + len, PAGE_SIZE - (m_pa + len));
3775 }
3776 vm_page_copy(tmp,m);
3777 VM_PAGE_FREE(tmp);
3778 #endif
3779
3780 }
3781
3782 /*
3783 * vm_page_zero_fill:
3784 *
3785 * Zero-fill the specified page.
3786 */
3787 void
3788 vm_page_zero_fill(
3789 vm_page_t m)
3790 {
3791 XPR(XPR_VM_PAGE,
3792 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3793 m->object, m->offset, m, 0,0);
3794 #if 0
3795 /*
3796 * we don't hold the page queue lock
3797 * so this check isn't safe to make
3798 */
3799 VM_PAGE_CHECK(m);
3800 #endif
3801
3802 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3803 pmap_zero_page(m->phys_page);
3804 }
3805
3806 /*
3807 * vm_page_part_copy:
3808 *
3809 * copy part of one page to another
3810 */
3811
3812 void
3813 vm_page_part_copy(
3814 vm_page_t src_m,
3815 vm_offset_t src_pa,
3816 vm_page_t dst_m,
3817 vm_offset_t dst_pa,
3818 vm_size_t len)
3819 {
3820 #if 0
3821 /*
3822 * we don't hold the page queue lock
3823 * so this check isn't safe to make
3824 */
3825 VM_PAGE_CHECK(src_m);
3826 VM_PAGE_CHECK(dst_m);
3827 #endif
3828 pmap_copy_part_page(src_m->phys_page, src_pa,
3829 dst_m->phys_page, dst_pa, len);
3830 }
3831
3832 /*
3833 * vm_page_copy:
3834 *
3835 * Copy one page to another
3836 *
3837 * ENCRYPTED SWAP:
3838 * The source page should not be encrypted. The caller should
3839 * make sure the page is decrypted first, if necessary.
3840 */
3841
3842 int vm_page_copy_cs_validations = 0;
3843 int vm_page_copy_cs_tainted = 0;
3844
3845 void
3846 vm_page_copy(
3847 vm_page_t src_m,
3848 vm_page_t dest_m)
3849 {
3850 XPR(XPR_VM_PAGE,
3851 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3852 src_m->object, src_m->offset,
3853 dest_m->object, dest_m->offset,
3854 0);
3855 #if 0
3856 /*
3857 * we don't hold the page queue lock
3858 * so this check isn't safe to make
3859 */
3860 VM_PAGE_CHECK(src_m);
3861 VM_PAGE_CHECK(dest_m);
3862 #endif
3863 vm_object_lock_assert_held(src_m->object);
3864
3865 /*
3866 * ENCRYPTED SWAP:
3867 * The source page should not be encrypted at this point.
3868 * The destination page will therefore not contain encrypted
3869 * data after the copy.
3870 */
3871 if (src_m->encrypted) {
3872 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3873 }
3874 dest_m->encrypted = FALSE;
3875
3876 if (src_m->object != VM_OBJECT_NULL &&
3877 src_m->object->code_signed) {
3878 /*
3879 * We're copying a page from a code-signed object.
3880 * Whoever ends up mapping the copy page might care about
3881 * the original page's integrity, so let's validate the
3882 * source page now.
3883 */
3884 vm_page_copy_cs_validations++;
3885 vm_page_validate_cs(src_m);
3886 }
3887
3888 if (vm_page_is_slideable(src_m)) {
3889 boolean_t was_busy = src_m->busy;
3890 src_m->busy = TRUE;
3891 (void) vm_page_slide(src_m, 0);
3892 assert(src_m->busy);
3893 if (!was_busy) {
3894 PAGE_WAKEUP_DONE(src_m);
3895 }
3896 }
3897
3898 /*
3899 * Propagate the cs_tainted bit to the copy page. Do not propagate
3900 * the cs_validated bit.
3901 */
3902 dest_m->cs_tainted = src_m->cs_tainted;
3903 if (dest_m->cs_tainted) {
3904 vm_page_copy_cs_tainted++;
3905 }
3906 dest_m->slid = src_m->slid;
3907 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3908 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3909 }
3910
3911 #if MACH_ASSERT
3912 static void
3913 _vm_page_print(
3914 vm_page_t p)
3915 {
3916 printf("vm_page %p: \n", p);
3917 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3918 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3919 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3920 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3921 printf(" wire_count=%u\n", p->wire_count);
3922
3923 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3924 (p->local ? "" : "!"),
3925 (p->inactive ? "" : "!"),
3926 (p->active ? "" : "!"),
3927 (p->pageout_queue ? "" : "!"),
3928 (p->speculative ? "" : "!"),
3929 (p->laundry ? "" : "!"));
3930 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3931 (p->free ? "" : "!"),
3932 (p->reference ? "" : "!"),
3933 (p->gobbled ? "" : "!"),
3934 (p->private ? "" : "!"),
3935 (p->throttled ? "" : "!"));
3936 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3937 (p->busy ? "" : "!"),
3938 (p->wanted ? "" : "!"),
3939 (p->tabled ? "" : "!"),
3940 (p->fictitious ? "" : "!"),
3941 (p->pmapped ? "" : "!"),
3942 (p->wpmapped ? "" : "!"));
3943 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3944 (p->pageout ? "" : "!"),
3945 (p->absent ? "" : "!"),
3946 (p->error ? "" : "!"),
3947 (p->dirty ? "" : "!"),
3948 (p->cleaning ? "" : "!"),
3949 (p->precious ? "" : "!"),
3950 (p->clustered ? "" : "!"));
3951 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3952 (p->overwriting ? "" : "!"),
3953 (p->restart ? "" : "!"),
3954 (p->unusual ? "" : "!"),
3955 (p->encrypted ? "" : "!"),
3956 (p->encrypted_cleaning ? "" : "!"));
3957 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
3958 (p->cs_validated ? "" : "!"),
3959 (p->cs_tainted ? "" : "!"),
3960 (p->cs_nx ? "" : "!"),
3961 (p->no_cache ? "" : "!"));
3962
3963 printf("phys_page=0x%x\n", p->phys_page);
3964 }
3965
3966 /*
3967 * Check that the list of pages is ordered by
3968 * ascending physical address and has no holes.
3969 */
3970 static int
3971 vm_page_verify_contiguous(
3972 vm_page_t pages,
3973 unsigned int npages)
3974 {
3975 register vm_page_t m;
3976 unsigned int page_count;
3977 vm_offset_t prev_addr;
3978
3979 prev_addr = pages->phys_page;
3980 page_count = 1;
3981 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3982 if (m->phys_page != prev_addr + 1) {
3983 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3984 m, (long)prev_addr, m->phys_page);
3985 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3986 panic("vm_page_verify_contiguous: not contiguous!");
3987 }
3988 prev_addr = m->phys_page;
3989 ++page_count;
3990 }
3991 if (page_count != npages) {
3992 printf("pages %p actual count 0x%x but requested 0x%x\n",
3993 pages, page_count, npages);
3994 panic("vm_page_verify_contiguous: count error");
3995 }
3996 return 1;
3997 }
3998
3999
4000 /*
4001 * Check the free lists for proper length etc.
4002 */
4003 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4004 static unsigned int
4005 vm_page_verify_free_list(
4006 queue_head_t *vm_page_queue,
4007 unsigned int color,
4008 vm_page_t look_for_page,
4009 boolean_t expect_page)
4010 {
4011 unsigned int npages;
4012 vm_page_t m;
4013 vm_page_t prev_m;
4014 boolean_t found_page;
4015
4016 if (! vm_page_verify_this_free_list_enabled)
4017 return 0;
4018
4019 found_page = FALSE;
4020 npages = 0;
4021 prev_m = (vm_page_t) vm_page_queue;
4022 queue_iterate(vm_page_queue,
4023 m,
4024 vm_page_t,
4025 pageq) {
4026
4027 if (m == look_for_page) {
4028 found_page = TRUE;
4029 }
4030 if ((vm_page_t) m->pageq.prev != prev_m)
4031 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4032 color, npages, m, m->pageq.prev, prev_m);
4033 if ( ! m->busy )
4034 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4035 color, npages, m);
4036 if (color != (unsigned int) -1) {
4037 if ((m->phys_page & vm_color_mask) != color)
4038 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4039 color, npages, m, m->phys_page & vm_color_mask, color);
4040 if ( ! m->free )
4041 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
4042 color, npages, m);
4043 }
4044 ++npages;
4045 prev_m = m;
4046 }
4047 if (look_for_page != VM_PAGE_NULL) {
4048 unsigned int other_color;
4049
4050 if (expect_page && !found_page) {
4051 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4052 color, npages, look_for_page, look_for_page->phys_page);
4053 _vm_page_print(look_for_page);
4054 for (other_color = 0;
4055 other_color < vm_colors;
4056 other_color++) {
4057 if (other_color == color)
4058 continue;
4059 vm_page_verify_free_list(&vm_page_queue_free[other_color],
4060 other_color, look_for_page, FALSE);
4061 }
4062 if (color == (unsigned int) -1) {
4063 vm_page_verify_free_list(&vm_lopage_queue_free,
4064 (unsigned int) -1, look_for_page, FALSE);
4065 }
4066 panic("vm_page_verify_free_list(color=%u)\n", color);
4067 }
4068 if (!expect_page && found_page) {
4069 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4070 color, npages, look_for_page, look_for_page->phys_page);
4071 }
4072 }
4073 return npages;
4074 }
4075
4076 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4077 static void
4078 vm_page_verify_free_lists( void )
4079 {
4080 unsigned int color, npages, nlopages;
4081 boolean_t toggle = TRUE;
4082
4083 if (! vm_page_verify_all_free_lists_enabled)
4084 return;
4085
4086 npages = 0;
4087
4088 lck_mtx_lock(&vm_page_queue_free_lock);
4089
4090 if (vm_page_verify_this_free_list_enabled == TRUE) {
4091 /*
4092 * This variable has been set globally for extra checking of
4093 * each free list Q. Since we didn't set it, we don't own it
4094 * and we shouldn't toggle it.
4095 */
4096 toggle = FALSE;
4097 }
4098
4099 if (toggle == TRUE) {
4100 vm_page_verify_this_free_list_enabled = TRUE;
4101 }
4102
4103 for( color = 0; color < vm_colors; color++ ) {
4104 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
4105 color, VM_PAGE_NULL, FALSE);
4106 }
4107 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4108 (unsigned int) -1,
4109 VM_PAGE_NULL, FALSE);
4110 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4111 panic("vm_page_verify_free_lists: "
4112 "npages %u free_count %d nlopages %u lo_free_count %u",
4113 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4114
4115 if (toggle == TRUE) {
4116 vm_page_verify_this_free_list_enabled = FALSE;
4117 }
4118
4119 lck_mtx_unlock(&vm_page_queue_free_lock);
4120 }
4121
4122 void
4123 vm_page_queues_assert(
4124 vm_page_t mem,
4125 int val)
4126 {
4127 #if DEBUG
4128 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4129 #endif
4130 if (mem->free + mem->active + mem->inactive + mem->speculative +
4131 mem->throttled + mem->pageout_queue > (val)) {
4132 _vm_page_print(mem);
4133 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
4134 }
4135 if (VM_PAGE_WIRED(mem)) {
4136 assert(!mem->active);
4137 assert(!mem->inactive);
4138 assert(!mem->speculative);
4139 assert(!mem->throttled);
4140 assert(!mem->pageout_queue);
4141 }
4142 }
4143 #endif /* MACH_ASSERT */
4144
4145
4146
4147
4148
4149 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4150
4151 /*
4152 * CONTIGUOUS PAGE ALLOCATION
4153 *
4154 * Find a region large enough to contain at least n pages
4155 * of contiguous physical memory.
4156 *
4157 * This is done by traversing the vm_page_t array in a linear fashion
4158 * we assume that the vm_page_t array has the avaiable physical pages in an
4159 * ordered, ascending list... this is currently true of all our implementations
4160 * and must remain so... there can be 'holes' in the array... we also can
4161 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4162 * which use to happen via 'vm_page_convert'... that function was no longer
4163 * being called and was removed...
4164 *
4165 * The basic flow consists of stabilizing some of the interesting state of
4166 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4167 * sweep at the beginning of the array looking for pages that meet our criterea
4168 * for a 'stealable' page... currently we are pretty conservative... if the page
4169 * meets this criterea and is physically contiguous to the previous page in the 'run'
4170 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4171 * and start to develop a new run... if at this point we've already considered
4172 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4173 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4174 * to other threads trying to acquire free pages (or move pages from q to q),
4175 * and then continue from the spot we left off... we only make 1 pass through the
4176 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4177 * which steals the pages from the queues they're currently on... pages on the free
4178 * queue can be stolen directly... pages that are on any of the other queues
4179 * must be removed from the object they are tabled on... this requires taking the
4180 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4181 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4182 * dump the pages we've currently stolen back to the free list, and pick up our
4183 * scan from the point where we aborted the 'current' run.
4184 *
4185 *
4186 * Requirements:
4187 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4188 *
4189 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4190 *
4191 * Algorithm:
4192 */
4193
4194 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4195
4196
4197 #define RESET_STATE_OF_RUN() \
4198 MACRO_BEGIN \
4199 prevcontaddr = -2; \
4200 start_pnum = -1; \
4201 free_considered = 0; \
4202 substitute_needed = 0; \
4203 npages = 0; \
4204 MACRO_END
4205
4206 /*
4207 * Can we steal in-use (i.e. not free) pages when searching for
4208 * physically-contiguous pages ?
4209 */
4210 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4211
4212 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4213 #if DEBUG
4214 int vm_page_find_contig_debug = 0;
4215 #endif
4216
4217 static vm_page_t
4218 vm_page_find_contiguous(
4219 unsigned int contig_pages,
4220 ppnum_t max_pnum,
4221 ppnum_t pnum_mask,
4222 boolean_t wire,
4223 int flags)
4224 {
4225 vm_page_t m = NULL;
4226 ppnum_t prevcontaddr;
4227 ppnum_t start_pnum;
4228 unsigned int npages, considered, scanned;
4229 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4230 unsigned int idx_last_contig_page_found = 0;
4231 int free_considered, free_available;
4232 int substitute_needed;
4233 boolean_t wrapped, zone_gc_called = FALSE;
4234 #if DEBUG
4235 clock_sec_t tv_start_sec, tv_end_sec;
4236 clock_usec_t tv_start_usec, tv_end_usec;
4237 #endif
4238
4239 int yielded = 0;
4240 int dumped_run = 0;
4241 int stolen_pages = 0;
4242 int compressed_pages = 0;
4243
4244
4245 if (contig_pages == 0)
4246 return VM_PAGE_NULL;
4247
4248 full_scan_again:
4249
4250 #if MACH_ASSERT
4251 vm_page_verify_free_lists();
4252 #endif
4253 #if DEBUG
4254 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4255 #endif
4256 PAGE_REPLACEMENT_ALLOWED(TRUE);
4257
4258 vm_page_lock_queues();
4259
4260
4261 lck_mtx_lock(&vm_page_queue_free_lock);
4262
4263 RESET_STATE_OF_RUN();
4264
4265 scanned = 0;
4266 considered = 0;
4267 free_available = vm_page_free_count - vm_page_free_reserved;
4268
4269 wrapped = FALSE;
4270
4271 if(flags & KMA_LOMEM)
4272 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4273 else
4274 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4275
4276 orig_last_idx = idx_last_contig_page_found;
4277 last_idx = orig_last_idx;
4278
4279 for (page_idx = last_idx, start_idx = last_idx;
4280 npages < contig_pages && page_idx < vm_pages_count;
4281 page_idx++) {
4282 retry:
4283 if (wrapped &&
4284 npages == 0 &&
4285 page_idx >= orig_last_idx) {
4286 /*
4287 * We're back where we started and we haven't
4288 * found any suitable contiguous range. Let's
4289 * give up.
4290 */
4291 break;
4292 }
4293 scanned++;
4294 m = &vm_pages[page_idx];
4295
4296 assert(!m->fictitious);
4297 assert(!m->private);
4298
4299 if (max_pnum && m->phys_page > max_pnum) {
4300 /* no more low pages... */
4301 break;
4302 }
4303 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4304 /*
4305 * not aligned
4306 */
4307 RESET_STATE_OF_RUN();
4308
4309 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4310 m->encrypted_cleaning ||
4311 m->pageout_queue || m->laundry || m->wanted ||
4312 m->cleaning || m->overwriting || m->pageout) {
4313 /*
4314 * page is in a transient state
4315 * or a state we don't want to deal
4316 * with, so don't consider it which
4317 * means starting a new run
4318 */
4319 RESET_STATE_OF_RUN();
4320
4321 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4322 /*
4323 * page needs to be on one of our queues
4324 * or it needs to belong to the compressor pool
4325 * in order for it to be stable behind the
4326 * locks we hold at this point...
4327 * if not, don't consider it which
4328 * means starting a new run
4329 */
4330 RESET_STATE_OF_RUN();
4331
4332 } else if (!m->free && (!m->tabled || m->busy)) {
4333 /*
4334 * pages on the free list are always 'busy'
4335 * so we couldn't test for 'busy' in the check
4336 * for the transient states... pages that are
4337 * 'free' are never 'tabled', so we also couldn't
4338 * test for 'tabled'. So we check here to make
4339 * sure that a non-free page is not busy and is
4340 * tabled on an object...
4341 * if not, don't consider it which
4342 * means starting a new run
4343 */
4344 RESET_STATE_OF_RUN();
4345
4346 } else {
4347 if (m->phys_page != prevcontaddr + 1) {
4348 if ((m->phys_page & pnum_mask) != 0) {
4349 RESET_STATE_OF_RUN();
4350 goto did_consider;
4351 } else {
4352 npages = 1;
4353 start_idx = page_idx;
4354 start_pnum = m->phys_page;
4355 }
4356 } else {
4357 npages++;
4358 }
4359 prevcontaddr = m->phys_page;
4360
4361 VM_PAGE_CHECK(m);
4362 if (m->free) {
4363 free_considered++;
4364 } else {
4365 /*
4366 * This page is not free.
4367 * If we can't steal used pages,
4368 * we have to give up this run
4369 * and keep looking.
4370 * Otherwise, we might need to
4371 * move the contents of this page
4372 * into a substitute page.
4373 */
4374 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4375 if (m->pmapped || m->dirty || m->precious) {
4376 substitute_needed++;
4377 }
4378 #else
4379 RESET_STATE_OF_RUN();
4380 #endif
4381 }
4382
4383 if ((free_considered + substitute_needed) > free_available) {
4384 /*
4385 * if we let this run continue
4386 * we will end up dropping the vm_page_free_count
4387 * below the reserve limit... we need to abort
4388 * this run, but we can at least re-consider this
4389 * page... thus the jump back to 'retry'
4390 */
4391 RESET_STATE_OF_RUN();
4392
4393 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4394 considered++;
4395 goto retry;
4396 }
4397 /*
4398 * free_available == 0
4399 * so can't consider any free pages... if
4400 * we went to retry in this case, we'd
4401 * get stuck looking at the same page
4402 * w/o making any forward progress
4403 * we also want to take this path if we've already
4404 * reached our limit that controls the lock latency
4405 */
4406 }
4407 }
4408 did_consider:
4409 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4410
4411 PAGE_REPLACEMENT_ALLOWED(FALSE);
4412
4413 lck_mtx_unlock(&vm_page_queue_free_lock);
4414 vm_page_unlock_queues();
4415
4416 mutex_pause(0);
4417
4418 PAGE_REPLACEMENT_ALLOWED(TRUE);
4419
4420 vm_page_lock_queues();
4421 lck_mtx_lock(&vm_page_queue_free_lock);
4422
4423 RESET_STATE_OF_RUN();
4424 /*
4425 * reset our free page limit since we
4426 * dropped the lock protecting the vm_page_free_queue
4427 */
4428 free_available = vm_page_free_count - vm_page_free_reserved;
4429 considered = 0;
4430
4431 yielded++;
4432
4433 goto retry;
4434 }
4435 considered++;
4436 }
4437 m = VM_PAGE_NULL;
4438
4439 if (npages != contig_pages) {
4440 if (!wrapped) {
4441 /*
4442 * We didn't find a contiguous range but we didn't
4443 * start from the very first page.
4444 * Start again from the very first page.
4445 */
4446 RESET_STATE_OF_RUN();
4447 if( flags & KMA_LOMEM)
4448 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4449 else
4450 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4451 last_idx = 0;
4452 page_idx = last_idx;
4453 wrapped = TRUE;
4454 goto retry;
4455 }
4456 lck_mtx_unlock(&vm_page_queue_free_lock);
4457 } else {
4458 vm_page_t m1;
4459 vm_page_t m2;
4460 unsigned int cur_idx;
4461 unsigned int tmp_start_idx;
4462 vm_object_t locked_object = VM_OBJECT_NULL;
4463 boolean_t abort_run = FALSE;
4464
4465 assert(page_idx - start_idx == contig_pages);
4466
4467 tmp_start_idx = start_idx;
4468
4469 /*
4470 * first pass through to pull the free pages
4471 * off of the free queue so that in case we
4472 * need substitute pages, we won't grab any
4473 * of the free pages in the run... we'll clear
4474 * the 'free' bit in the 2nd pass, and even in
4475 * an abort_run case, we'll collect all of the
4476 * free pages in this run and return them to the free list
4477 */
4478 while (start_idx < page_idx) {
4479
4480 m1 = &vm_pages[start_idx++];
4481
4482 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4483 assert(m1->free);
4484 #endif
4485
4486 if (m1->free) {
4487 unsigned int color;
4488
4489 color = m1->phys_page & vm_color_mask;
4490 #if MACH_ASSERT
4491 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4492 #endif
4493 queue_remove(&vm_page_queue_free[color],
4494 m1,
4495 vm_page_t,
4496 pageq);
4497 m1->pageq.next = NULL;
4498 m1->pageq.prev = NULL;
4499 #if MACH_ASSERT
4500 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4501 #endif
4502 /*
4503 * Clear the "free" bit so that this page
4504 * does not get considered for another
4505 * concurrent physically-contiguous allocation.
4506 */
4507 m1->free = FALSE;
4508 assert(m1->busy);
4509
4510 vm_page_free_count--;
4511 }
4512 }
4513 if( flags & KMA_LOMEM)
4514 vm_page_lomem_find_contiguous_last_idx = page_idx;
4515 else
4516 vm_page_find_contiguous_last_idx = page_idx;
4517
4518 /*
4519 * we can drop the free queue lock at this point since
4520 * we've pulled any 'free' candidates off of the list
4521 * we need it dropped so that we can do a vm_page_grab
4522 * when substituing for pmapped/dirty pages
4523 */
4524 lck_mtx_unlock(&vm_page_queue_free_lock);
4525
4526 start_idx = tmp_start_idx;
4527 cur_idx = page_idx - 1;
4528
4529 while (start_idx++ < page_idx) {
4530 /*
4531 * must go through the list from back to front
4532 * so that the page list is created in the
4533 * correct order - low -> high phys addresses
4534 */
4535 m1 = &vm_pages[cur_idx--];
4536
4537 assert(!m1->free);
4538
4539 if (m1->object == VM_OBJECT_NULL) {
4540 /*
4541 * page has already been removed from
4542 * the free list in the 1st pass
4543 */
4544 assert(m1->offset == (vm_object_offset_t) -1);
4545 assert(m1->busy);
4546 assert(!m1->wanted);
4547 assert(!m1->laundry);
4548 } else {
4549 vm_object_t object;
4550 int refmod;
4551 boolean_t disconnected, reusable;
4552
4553 if (abort_run == TRUE)
4554 continue;
4555
4556 object = m1->object;
4557
4558 if (object != locked_object) {
4559 if (locked_object) {
4560 vm_object_unlock(locked_object);
4561 locked_object = VM_OBJECT_NULL;
4562 }
4563 if (vm_object_lock_try(object))
4564 locked_object = object;
4565 }
4566 if (locked_object == VM_OBJECT_NULL ||
4567 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4568 m1->encrypted_cleaning ||
4569 m1->pageout_queue || m1->laundry || m1->wanted ||
4570 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4571
4572 if (locked_object) {
4573 vm_object_unlock(locked_object);
4574 locked_object = VM_OBJECT_NULL;
4575 }
4576 tmp_start_idx = cur_idx;
4577 abort_run = TRUE;
4578 continue;
4579 }
4580
4581 disconnected = FALSE;
4582 reusable = FALSE;
4583
4584 if ((m1->reusable ||
4585 m1->object->all_reusable) &&
4586 m1->inactive &&
4587 !m1->dirty &&
4588 !m1->reference) {
4589 /* reusable page... */
4590 refmod = pmap_disconnect(m1->phys_page);
4591 disconnected = TRUE;
4592 if (refmod == 0) {
4593 /*
4594 * ... not reused: can steal
4595 * without relocating contents.
4596 */
4597 reusable = TRUE;
4598 }
4599 }
4600
4601 if ((m1->pmapped &&
4602 ! reusable) ||
4603 m1->dirty ||
4604 m1->precious) {
4605 vm_object_offset_t offset;
4606
4607 m2 = vm_page_grab();
4608
4609 if (m2 == VM_PAGE_NULL) {
4610 if (locked_object) {
4611 vm_object_unlock(locked_object);
4612 locked_object = VM_OBJECT_NULL;
4613 }
4614 tmp_start_idx = cur_idx;
4615 abort_run = TRUE;
4616 continue;
4617 }
4618 if (! disconnected) {
4619 if (m1->pmapped)
4620 refmod = pmap_disconnect(m1->phys_page);
4621 else
4622 refmod = 0;
4623 }
4624
4625 /* copy the page's contents */
4626 pmap_copy_page(m1->phys_page, m2->phys_page);
4627 /* copy the page's state */
4628 assert(!VM_PAGE_WIRED(m1));
4629 assert(!m1->free);
4630 assert(!m1->pageout_queue);
4631 assert(!m1->laundry);
4632 m2->reference = m1->reference;
4633 assert(!m1->gobbled);
4634 assert(!m1->private);
4635 m2->no_cache = m1->no_cache;
4636 m2->xpmapped = 0;
4637 assert(!m1->busy);
4638 assert(!m1->wanted);
4639 assert(!m1->fictitious);
4640 m2->pmapped = m1->pmapped; /* should flush cache ? */
4641 m2->wpmapped = m1->wpmapped;
4642 assert(!m1->pageout);
4643 m2->absent = m1->absent;
4644 m2->error = m1->error;
4645 m2->dirty = m1->dirty;
4646 assert(!m1->cleaning);
4647 m2->precious = m1->precious;
4648 m2->clustered = m1->clustered;
4649 assert(!m1->overwriting);
4650 m2->restart = m1->restart;
4651 m2->unusual = m1->unusual;
4652 m2->encrypted = m1->encrypted;
4653 assert(!m1->encrypted_cleaning);
4654 m2->cs_validated = m1->cs_validated;
4655 m2->cs_tainted = m1->cs_tainted;
4656 m2->cs_nx = m1->cs_nx;
4657
4658 /*
4659 * If m1 had really been reusable,
4660 * we would have just stolen it, so
4661 * let's not propagate it's "reusable"
4662 * bit and assert that m2 is not
4663 * marked as "reusable".
4664 */
4665 // m2->reusable = m1->reusable;
4666 assert(!m2->reusable);
4667
4668 assert(!m1->lopage);
4669 m2->slid = m1->slid;
4670 m2->compressor = m1->compressor;
4671
4672 /*
4673 * page may need to be flushed if
4674 * it is marshalled into a UPL
4675 * that is going to be used by a device
4676 * that doesn't support coherency
4677 */
4678 m2->written_by_kernel = TRUE;
4679
4680 /*
4681 * make sure we clear the ref/mod state
4682 * from the pmap layer... else we risk
4683 * inheriting state from the last time
4684 * this page was used...
4685 */
4686 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4687
4688 if (refmod & VM_MEM_REFERENCED)
4689 m2->reference = TRUE;
4690 if (refmod & VM_MEM_MODIFIED) {
4691 SET_PAGE_DIRTY(m2, TRUE);
4692 }
4693 offset = m1->offset;
4694
4695 /*
4696 * completely cleans up the state
4697 * of the page so that it is ready
4698 * to be put onto the free list, or
4699 * for this purpose it looks like it
4700 * just came off of the free list
4701 */
4702 vm_page_free_prepare(m1);
4703
4704 /*
4705 * now put the substitute page
4706 * on the object
4707 */
4708 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
4709
4710 if (m2->compressor) {
4711 m2->pmapped = TRUE;
4712 m2->wpmapped = TRUE;
4713
4714 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4715 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4716
4717 compressed_pages++;
4718
4719 } else {
4720 if (m2->reference)
4721 vm_page_activate(m2);
4722 else
4723 vm_page_deactivate(m2);
4724 }
4725 PAGE_WAKEUP_DONE(m2);
4726
4727 } else {
4728 assert(!m1->compressor);
4729
4730 /*
4731 * completely cleans up the state
4732 * of the page so that it is ready
4733 * to be put onto the free list, or
4734 * for this purpose it looks like it
4735 * just came off of the free list
4736 */
4737 vm_page_free_prepare(m1);
4738 }
4739
4740 stolen_pages++;
4741
4742 }
4743 m1->pageq.next = (queue_entry_t) m;
4744 m1->pageq.prev = NULL;
4745 m = m1;
4746 }
4747 if (locked_object) {
4748 vm_object_unlock(locked_object);
4749 locked_object = VM_OBJECT_NULL;
4750 }
4751
4752 if (abort_run == TRUE) {
4753 if (m != VM_PAGE_NULL) {
4754 vm_page_free_list(m, FALSE);
4755 }
4756
4757 dumped_run++;
4758
4759 /*
4760 * want the index of the last
4761 * page in this run that was
4762 * successfully 'stolen', so back
4763 * it up 1 for the auto-decrement on use
4764 * and 1 more to bump back over this page
4765 */
4766 page_idx = tmp_start_idx + 2;
4767 if (page_idx >= vm_pages_count) {
4768 if (wrapped)
4769 goto done_scanning;
4770 page_idx = last_idx = 0;
4771 wrapped = TRUE;
4772 }
4773 abort_run = FALSE;
4774
4775 /*
4776 * We didn't find a contiguous range but we didn't
4777 * start from the very first page.
4778 * Start again from the very first page.
4779 */
4780 RESET_STATE_OF_RUN();
4781
4782 if( flags & KMA_LOMEM)
4783 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4784 else
4785 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4786
4787 last_idx = page_idx;
4788
4789 lck_mtx_lock(&vm_page_queue_free_lock);
4790 /*
4791 * reset our free page limit since we
4792 * dropped the lock protecting the vm_page_free_queue
4793 */
4794 free_available = vm_page_free_count - vm_page_free_reserved;
4795 goto retry;
4796 }
4797
4798 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4799
4800 if (wire == TRUE)
4801 m1->wire_count++;
4802 else
4803 m1->gobbled = TRUE;
4804 }
4805 if (wire == FALSE)
4806 vm_page_gobble_count += npages;
4807
4808 /*
4809 * gobbled pages are also counted as wired pages
4810 */
4811 vm_page_wire_count += npages;
4812
4813 assert(vm_page_verify_contiguous(m, npages));
4814 }
4815 done_scanning:
4816 PAGE_REPLACEMENT_ALLOWED(FALSE);
4817
4818 vm_page_unlock_queues();
4819
4820 #if DEBUG
4821 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4822
4823 tv_end_sec -= tv_start_sec;
4824 if (tv_end_usec < tv_start_usec) {
4825 tv_end_sec--;
4826 tv_end_usec += 1000000;
4827 }
4828 tv_end_usec -= tv_start_usec;
4829 if (tv_end_usec >= 1000000) {
4830 tv_end_sec++;
4831 tv_end_sec -= 1000000;
4832 }
4833 if (vm_page_find_contig_debug) {
4834 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4835 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4836 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4837 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4838 }
4839
4840 #endif
4841 #if MACH_ASSERT
4842 vm_page_verify_free_lists();
4843 #endif
4844 if (m == NULL && zone_gc_called == FALSE) {
4845 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
4846 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4847 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
4848
4849 if (consider_buffer_cache_collect != NULL) {
4850 (void)(*consider_buffer_cache_collect)(1);
4851 }
4852
4853 consider_zone_gc(TRUE);
4854
4855 zone_gc_called = TRUE;
4856
4857 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
4858 goto full_scan_again;
4859 }
4860
4861 return m;
4862 }
4863
4864 /*
4865 * Allocate a list of contiguous, wired pages.
4866 */
4867 kern_return_t
4868 cpm_allocate(
4869 vm_size_t size,
4870 vm_page_t *list,
4871 ppnum_t max_pnum,
4872 ppnum_t pnum_mask,
4873 boolean_t wire,
4874 int flags)
4875 {
4876 vm_page_t pages;
4877 unsigned int npages;
4878
4879 if (size % PAGE_SIZE != 0)
4880 return KERN_INVALID_ARGUMENT;
4881
4882 npages = (unsigned int) (size / PAGE_SIZE);
4883 if (npages != size / PAGE_SIZE) {
4884 /* 32-bit overflow */
4885 return KERN_INVALID_ARGUMENT;
4886 }
4887
4888 /*
4889 * Obtain a pointer to a subset of the free
4890 * list large enough to satisfy the request;
4891 * the region will be physically contiguous.
4892 */
4893 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4894
4895 if (pages == VM_PAGE_NULL)
4896 return KERN_NO_SPACE;
4897 /*
4898 * determine need for wakeups
4899 */
4900 if ((vm_page_free_count < vm_page_free_min) ||
4901 ((vm_page_free_count < vm_page_free_target) &&
4902 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4903 thread_wakeup((event_t) &vm_page_free_wanted);
4904
4905 VM_CHECK_MEMORYSTATUS;
4906
4907 /*
4908 * The CPM pages should now be available and
4909 * ordered by ascending physical address.
4910 */
4911 assert(vm_page_verify_contiguous(pages, npages));
4912
4913 *list = pages;
4914 return KERN_SUCCESS;
4915 }
4916
4917
4918 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4919
4920 /*
4921 * when working on a 'run' of pages, it is necessary to hold
4922 * the vm_page_queue_lock (a hot global lock) for certain operations
4923 * on the page... however, the majority of the work can be done
4924 * while merely holding the object lock... in fact there are certain
4925 * collections of pages that don't require any work brokered by the
4926 * vm_page_queue_lock... to mitigate the time spent behind the global
4927 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4928 * while doing all of the work that doesn't require the vm_page_queue_lock...
4929 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4930 * necessary work for each page... we will grab the busy bit on the page
4931 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4932 * if it can't immediately take the vm_page_queue_lock in order to compete
4933 * for the locks in the same order that vm_pageout_scan takes them.
4934 * the operation names are modeled after the names of the routines that
4935 * need to be called in order to make the changes very obvious in the
4936 * original loop
4937 */
4938
4939 void
4940 vm_page_do_delayed_work(
4941 vm_object_t object,
4942 vm_tag_t tag,
4943 struct vm_page_delayed_work *dwp,
4944 int dw_count)
4945 {
4946 int j;
4947 vm_page_t m;
4948 vm_page_t local_free_q = VM_PAGE_NULL;
4949
4950 /*
4951 * pageout_scan takes the vm_page_lock_queues first
4952 * then tries for the object lock... to avoid what
4953 * is effectively a lock inversion, we'll go to the
4954 * trouble of taking them in that same order... otherwise
4955 * if this object contains the majority of the pages resident
4956 * in the UBC (or a small set of large objects actively being
4957 * worked on contain the majority of the pages), we could
4958 * cause the pageout_scan thread to 'starve' in its attempt
4959 * to find pages to move to the free queue, since it has to
4960 * successfully acquire the object lock of any candidate page
4961 * before it can steal/clean it.
4962 */
4963 if (!vm_page_trylockspin_queues()) {
4964 vm_object_unlock(object);
4965
4966 vm_page_lockspin_queues();
4967
4968 for (j = 0; ; j++) {
4969 if (!vm_object_lock_avoid(object) &&
4970 _vm_object_lock_try(object))
4971 break;
4972 vm_page_unlock_queues();
4973 mutex_pause(j);
4974 vm_page_lockspin_queues();
4975 }
4976 }
4977 for (j = 0; j < dw_count; j++, dwp++) {
4978
4979 m = dwp->dw_m;
4980
4981 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4982 vm_pageout_throttle_up(m);
4983 #if CONFIG_PHANTOM_CACHE
4984 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4985 vm_phantom_cache_update(m);
4986 #endif
4987 if (dwp->dw_mask & DW_vm_page_wire)
4988 vm_page_wire(m, tag, FALSE);
4989 else if (dwp->dw_mask & DW_vm_page_unwire) {
4990 boolean_t queueit;
4991
4992 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4993
4994 vm_page_unwire(m, queueit);
4995 }
4996 if (dwp->dw_mask & DW_vm_page_free) {
4997 vm_page_free_prepare_queues(m);
4998
4999 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
5000 /*
5001 * Add this page to our list of reclaimed pages,
5002 * to be freed later.
5003 */
5004 m->pageq.next = (queue_entry_t) local_free_q;
5005 local_free_q = m;
5006 } else {
5007 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5008 vm_page_deactivate_internal(m, FALSE);
5009 else if (dwp->dw_mask & DW_vm_page_activate) {
5010 if (m->active == FALSE) {
5011 vm_page_activate(m);
5012 }
5013 }
5014 else if (dwp->dw_mask & DW_vm_page_speculate)
5015 vm_page_speculate(m, TRUE);
5016 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5017 /*
5018 * if we didn't hold the object lock and did this,
5019 * we might disconnect the page, then someone might
5020 * soft fault it back in, then we would put it on the
5021 * cleaned queue, and so we would have a referenced (maybe even dirty)
5022 * page on that queue, which we don't want
5023 */
5024 int refmod_state = pmap_disconnect(m->phys_page);
5025
5026 if ((refmod_state & VM_MEM_REFERENCED)) {
5027 /*
5028 * this page has been touched since it got cleaned; let's activate it
5029 * if it hasn't already been
5030 */
5031 vm_pageout_enqueued_cleaned++;
5032 vm_pageout_cleaned_reactivated++;
5033 vm_pageout_cleaned_commit_reactivated++;
5034
5035 if (m->active == FALSE)
5036 vm_page_activate(m);
5037 } else {
5038 m->reference = FALSE;
5039 vm_page_enqueue_cleaned(m);
5040 }
5041 }
5042 else if (dwp->dw_mask & DW_vm_page_lru)
5043 vm_page_lru(m);
5044 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5045 if ( !m->pageout_queue)
5046 vm_page_queues_remove(m);
5047 }
5048 if (dwp->dw_mask & DW_set_reference)
5049 m->reference = TRUE;
5050 else if (dwp->dw_mask & DW_clear_reference)
5051 m->reference = FALSE;
5052
5053 if (dwp->dw_mask & DW_move_page) {
5054 if ( !m->pageout_queue) {
5055 vm_page_queues_remove(m);
5056
5057 assert(m->object != kernel_object);
5058
5059 vm_page_enqueue_inactive(m, FALSE);
5060 }
5061 }
5062 if (dwp->dw_mask & DW_clear_busy)
5063 m->busy = FALSE;
5064
5065 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5066 PAGE_WAKEUP(m);
5067 }
5068 }
5069 vm_page_unlock_queues();
5070
5071 if (local_free_q)
5072 vm_page_free_list(local_free_q, TRUE);
5073
5074 VM_CHECK_MEMORYSTATUS;
5075
5076 }
5077
5078 kern_return_t
5079 vm_page_alloc_list(
5080 int page_count,
5081 int flags,
5082 vm_page_t *list)
5083 {
5084 vm_page_t lo_page_list = VM_PAGE_NULL;
5085 vm_page_t mem;
5086 int i;
5087
5088 if ( !(flags & KMA_LOMEM))
5089 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
5090
5091 for (i = 0; i < page_count; i++) {
5092
5093 mem = vm_page_grablo();
5094
5095 if (mem == VM_PAGE_NULL) {
5096 if (lo_page_list)
5097 vm_page_free_list(lo_page_list, FALSE);
5098
5099 *list = VM_PAGE_NULL;
5100
5101 return (KERN_RESOURCE_SHORTAGE);
5102 }
5103 mem->pageq.next = (queue_entry_t) lo_page_list;
5104 lo_page_list = mem;
5105 }
5106 *list = lo_page_list;
5107
5108 return (KERN_SUCCESS);
5109 }
5110
5111 void
5112 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
5113 {
5114 page->offset = offset;
5115 }
5116
5117 vm_page_t
5118 vm_page_get_next(vm_page_t page)
5119 {
5120 return ((vm_page_t) page->pageq.next);
5121 }
5122
5123 vm_object_offset_t
5124 vm_page_get_offset(vm_page_t page)
5125 {
5126 return (page->offset);
5127 }
5128
5129 ppnum_t
5130 vm_page_get_phys_page(vm_page_t page)
5131 {
5132 return (page->phys_page);
5133 }
5134
5135
5136 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5137
5138 #if HIBERNATION
5139
5140 static vm_page_t hibernate_gobble_queue;
5141
5142 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
5143 static int hibernate_flush_dirty_pages(int);
5144 static int hibernate_flush_queue(queue_head_t *, int);
5145
5146 void hibernate_flush_wait(void);
5147 void hibernate_mark_in_progress(void);
5148 void hibernate_clear_in_progress(void);
5149
5150 void hibernate_free_range(int, int);
5151 void hibernate_hash_insert_page(vm_page_t);
5152 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
5153 void hibernate_rebuild_vm_structs(void);
5154 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
5155 ppnum_t hibernate_lookup_paddr(unsigned int);
5156
5157 struct hibernate_statistics {
5158 int hibernate_considered;
5159 int hibernate_reentered_on_q;
5160 int hibernate_found_dirty;
5161 int hibernate_skipped_cleaning;
5162 int hibernate_skipped_transient;
5163 int hibernate_skipped_precious;
5164 int hibernate_skipped_external;
5165 int hibernate_queue_nolock;
5166 int hibernate_queue_paused;
5167 int hibernate_throttled;
5168 int hibernate_throttle_timeout;
5169 int hibernate_drained;
5170 int hibernate_drain_timeout;
5171 int cd_lock_failed;
5172 int cd_found_precious;
5173 int cd_found_wired;
5174 int cd_found_busy;
5175 int cd_found_unusual;
5176 int cd_found_cleaning;
5177 int cd_found_laundry;
5178 int cd_found_dirty;
5179 int cd_found_xpmapped;
5180 int cd_skipped_xpmapped;
5181 int cd_local_free;
5182 int cd_total_free;
5183 int cd_vm_page_wire_count;
5184 int cd_vm_struct_pages_unneeded;
5185 int cd_pages;
5186 int cd_discarded;
5187 int cd_count_wire;
5188 } hibernate_stats;
5189
5190
5191 /*
5192 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5193 * so that we don't overrun the estimated image size, which would
5194 * result in a hibernation failure.
5195 */
5196 #define HIBERNATE_XPMAPPED_LIMIT 40000
5197
5198
5199 static int
5200 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5201 {
5202 wait_result_t wait_result;
5203
5204 vm_page_lock_queues();
5205
5206 while ( !queue_empty(&q->pgo_pending) ) {
5207
5208 q->pgo_draining = TRUE;
5209
5210 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5211
5212 vm_page_unlock_queues();
5213
5214 wait_result = thread_block(THREAD_CONTINUE_NULL);
5215
5216 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5217 hibernate_stats.hibernate_drain_timeout++;
5218
5219 if (q == &vm_pageout_queue_external)
5220 return (0);
5221
5222 return (1);
5223 }
5224 vm_page_lock_queues();
5225
5226 hibernate_stats.hibernate_drained++;
5227 }
5228 vm_page_unlock_queues();
5229
5230 return (0);
5231 }
5232
5233
5234 boolean_t hibernate_skip_external = FALSE;
5235
5236 static int
5237 hibernate_flush_queue(queue_head_t *q, int qcount)
5238 {
5239 vm_page_t m;
5240 vm_object_t l_object = NULL;
5241 vm_object_t m_object = NULL;
5242 int refmod_state = 0;
5243 int try_failed_count = 0;
5244 int retval = 0;
5245 int current_run = 0;
5246 struct vm_pageout_queue *iq;
5247 struct vm_pageout_queue *eq;
5248 struct vm_pageout_queue *tq;
5249
5250
5251 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5252
5253 iq = &vm_pageout_queue_internal;
5254 eq = &vm_pageout_queue_external;
5255
5256 vm_page_lock_queues();
5257
5258 while (qcount && !queue_empty(q)) {
5259
5260 if (current_run++ == 1000) {
5261 if (hibernate_should_abort()) {
5262 retval = 1;
5263 break;
5264 }
5265 current_run = 0;
5266 }
5267
5268 m = (vm_page_t) queue_first(q);
5269 m_object = m->object;
5270
5271 /*
5272 * check to see if we currently are working
5273 * with the same object... if so, we've
5274 * already got the lock
5275 */
5276 if (m_object != l_object) {
5277 /*
5278 * the object associated with candidate page is
5279 * different from the one we were just working
5280 * with... dump the lock if we still own it
5281 */
5282 if (l_object != NULL) {
5283 vm_object_unlock(l_object);
5284 l_object = NULL;
5285 }
5286 /*
5287 * Try to lock object; since we've alread got the
5288 * page queues lock, we can only 'try' for this one.
5289 * if the 'try' fails, we need to do a mutex_pause
5290 * to allow the owner of the object lock a chance to
5291 * run...
5292 */
5293 if ( !vm_object_lock_try_scan(m_object)) {
5294
5295 if (try_failed_count > 20) {
5296 hibernate_stats.hibernate_queue_nolock++;
5297
5298 goto reenter_pg_on_q;
5299 }
5300
5301 vm_page_unlock_queues();
5302 mutex_pause(try_failed_count++);
5303 vm_page_lock_queues();
5304
5305 hibernate_stats.hibernate_queue_paused++;
5306 continue;
5307 } else {
5308 l_object = m_object;
5309 }
5310 }
5311 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5312 /*
5313 * page is not to be cleaned
5314 * put it back on the head of its queue
5315 */
5316 if (m->cleaning)
5317 hibernate_stats.hibernate_skipped_cleaning++;
5318 else
5319 hibernate_stats.hibernate_skipped_transient++;
5320
5321 goto reenter_pg_on_q;
5322 }
5323 if (m_object->copy == VM_OBJECT_NULL) {
5324 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5325 /*
5326 * let the normal hibernate image path
5327 * deal with these
5328 */
5329 goto reenter_pg_on_q;
5330 }
5331 }
5332 if ( !m->dirty && m->pmapped) {
5333 refmod_state = pmap_get_refmod(m->phys_page);
5334
5335 if ((refmod_state & VM_MEM_MODIFIED)) {
5336 SET_PAGE_DIRTY(m, FALSE);
5337 }
5338 } else
5339 refmod_state = 0;
5340
5341 if ( !m->dirty) {
5342 /*
5343 * page is not to be cleaned
5344 * put it back on the head of its queue
5345 */
5346 if (m->precious)
5347 hibernate_stats.hibernate_skipped_precious++;
5348
5349 goto reenter_pg_on_q;
5350 }
5351
5352 if (hibernate_skip_external == TRUE && !m_object->internal) {
5353
5354 hibernate_stats.hibernate_skipped_external++;
5355
5356 goto reenter_pg_on_q;
5357 }
5358 tq = NULL;
5359
5360 if (m_object->internal) {
5361 if (VM_PAGE_Q_THROTTLED(iq))
5362 tq = iq;
5363 } else if (VM_PAGE_Q_THROTTLED(eq))
5364 tq = eq;
5365
5366 if (tq != NULL) {
5367 wait_result_t wait_result;
5368 int wait_count = 5;
5369
5370 if (l_object != NULL) {
5371 vm_object_unlock(l_object);
5372 l_object = NULL;
5373 }
5374
5375 while (retval == 0) {
5376
5377 tq->pgo_throttled = TRUE;
5378
5379 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5380
5381 vm_page_unlock_queues();
5382
5383 wait_result = thread_block(THREAD_CONTINUE_NULL);
5384
5385 vm_page_lock_queues();
5386
5387 if (wait_result != THREAD_TIMED_OUT)
5388 break;
5389 if (!VM_PAGE_Q_THROTTLED(tq))
5390 break;
5391
5392 if (hibernate_should_abort())
5393 retval = 1;
5394
5395 if (--wait_count == 0) {
5396
5397 hibernate_stats.hibernate_throttle_timeout++;
5398
5399 if (tq == eq) {
5400 hibernate_skip_external = TRUE;
5401 break;
5402 }
5403 retval = 1;
5404 }
5405 }
5406 if (retval)
5407 break;
5408
5409 hibernate_stats.hibernate_throttled++;
5410
5411 continue;
5412 }
5413 /*
5414 * we've already factored out pages in the laundry which
5415 * means this page can't be on the pageout queue so it's
5416 * safe to do the vm_page_queues_remove
5417 */
5418 assert(!m->pageout_queue);
5419
5420 vm_page_queues_remove(m);
5421
5422 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5423 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5424
5425 (void)vm_pageout_cluster(m, FALSE, FALSE, FALSE);
5426
5427 hibernate_stats.hibernate_found_dirty++;
5428
5429 goto next_pg;
5430
5431 reenter_pg_on_q:
5432 queue_remove(q, m, vm_page_t, pageq);
5433 queue_enter(q, m, vm_page_t, pageq);
5434
5435 hibernate_stats.hibernate_reentered_on_q++;
5436 next_pg:
5437 hibernate_stats.hibernate_considered++;
5438
5439 qcount--;
5440 try_failed_count = 0;
5441 }
5442 if (l_object != NULL) {
5443 vm_object_unlock(l_object);
5444 l_object = NULL;
5445 }
5446
5447 vm_page_unlock_queues();
5448
5449 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5450
5451 return (retval);
5452 }
5453
5454
5455 static int
5456 hibernate_flush_dirty_pages(int pass)
5457 {
5458 struct vm_speculative_age_q *aq;
5459 uint32_t i;
5460
5461 if (vm_page_local_q) {
5462 for (i = 0; i < vm_page_local_q_count; i++)
5463 vm_page_reactivate_local(i, TRUE, FALSE);
5464 }
5465
5466 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5467 int qcount;
5468 vm_page_t m;
5469
5470 aq = &vm_page_queue_speculative[i];
5471
5472 if (queue_empty(&aq->age_q))
5473 continue;
5474 qcount = 0;
5475
5476 vm_page_lockspin_queues();
5477
5478 queue_iterate(&aq->age_q,
5479 m,
5480 vm_page_t,
5481 pageq)
5482 {
5483 qcount++;
5484 }
5485 vm_page_unlock_queues();
5486
5487 if (qcount) {
5488 if (hibernate_flush_queue(&aq->age_q, qcount))
5489 return (1);
5490 }
5491 }
5492 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5493 return (1);
5494 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5495 return (1);
5496 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5497 return (1);
5498 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5499 return (1);
5500
5501 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5502 vm_compressor_record_warmup_start();
5503
5504 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5505 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5506 vm_compressor_record_warmup_end();
5507 return (1);
5508 }
5509 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5510 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5511 vm_compressor_record_warmup_end();
5512 return (1);
5513 }
5514 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5515 vm_compressor_record_warmup_end();
5516
5517 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5518 return (1);
5519
5520 return (0);
5521 }
5522
5523
5524 void
5525 hibernate_reset_stats()
5526 {
5527 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5528 }
5529
5530
5531 int
5532 hibernate_flush_memory()
5533 {
5534 int retval;
5535
5536 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5537
5538 hibernate_cleaning_in_progress = TRUE;
5539 hibernate_skip_external = FALSE;
5540
5541 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5542
5543 if (COMPRESSED_PAGER_IS_ACTIVE) {
5544
5545 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5546
5547 vm_compressor_flush();
5548
5549 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5550 }
5551 if (consider_buffer_cache_collect != NULL) {
5552 unsigned int orig_wire_count;
5553
5554 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5555 orig_wire_count = vm_page_wire_count;
5556
5557 (void)(*consider_buffer_cache_collect)(1);
5558 consider_zone_gc(TRUE);
5559
5560 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5561
5562 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5563 }
5564 }
5565 hibernate_cleaning_in_progress = FALSE;
5566
5567 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5568
5569 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5570 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5571
5572
5573 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5574 hibernate_stats.hibernate_considered,
5575 hibernate_stats.hibernate_reentered_on_q,
5576 hibernate_stats.hibernate_found_dirty);
5577 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5578 hibernate_stats.hibernate_skipped_cleaning,
5579 hibernate_stats.hibernate_skipped_transient,
5580 hibernate_stats.hibernate_skipped_precious,
5581 hibernate_stats.hibernate_skipped_external,
5582 hibernate_stats.hibernate_queue_nolock);
5583 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5584 hibernate_stats.hibernate_queue_paused,
5585 hibernate_stats.hibernate_throttled,
5586 hibernate_stats.hibernate_throttle_timeout,
5587 hibernate_stats.hibernate_drained,
5588 hibernate_stats.hibernate_drain_timeout);
5589
5590 return (retval);
5591 }
5592
5593
5594 static void
5595 hibernate_page_list_zero(hibernate_page_list_t *list)
5596 {
5597 uint32_t bank;
5598 hibernate_bitmap_t * bitmap;
5599
5600 bitmap = &list->bank_bitmap[0];
5601 for (bank = 0; bank < list->bank_count; bank++)
5602 {
5603 uint32_t last_bit;
5604
5605 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5606 // set out-of-bound bits at end of bitmap.
5607 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5608 if (last_bit)
5609 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5610
5611 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5612 }
5613 }
5614
5615 void
5616 hibernate_free_gobble_pages(void)
5617 {
5618 vm_page_t m, next;
5619 uint32_t count = 0;
5620
5621 m = (vm_page_t) hibernate_gobble_queue;
5622 while(m)
5623 {
5624 next = (vm_page_t) m->pageq.next;
5625 vm_page_free(m);
5626 count++;
5627 m = next;
5628 }
5629 hibernate_gobble_queue = VM_PAGE_NULL;
5630
5631 if (count)
5632 HIBLOG("Freed %d pages\n", count);
5633 }
5634
5635 static boolean_t
5636 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5637 {
5638 vm_object_t object = NULL;
5639 int refmod_state;
5640 boolean_t discard = FALSE;
5641
5642 do
5643 {
5644 if (m->private)
5645 panic("hibernate_consider_discard: private");
5646
5647 if (!vm_object_lock_try(m->object)) {
5648 if (!preflight) hibernate_stats.cd_lock_failed++;
5649 break;
5650 }
5651 object = m->object;
5652
5653 if (VM_PAGE_WIRED(m)) {
5654 if (!preflight) hibernate_stats.cd_found_wired++;
5655 break;
5656 }
5657 if (m->precious) {
5658 if (!preflight) hibernate_stats.cd_found_precious++;
5659 break;
5660 }
5661 if (m->busy || !object->alive) {
5662 /*
5663 * Somebody is playing with this page.
5664 */
5665 if (!preflight) hibernate_stats.cd_found_busy++;
5666 break;
5667 }
5668 if (m->absent || m->unusual || m->error) {
5669 /*
5670 * If it's unusual in anyway, ignore it
5671 */
5672 if (!preflight) hibernate_stats.cd_found_unusual++;
5673 break;
5674 }
5675 if (m->cleaning) {
5676 if (!preflight) hibernate_stats.cd_found_cleaning++;
5677 break;
5678 }
5679 if (m->laundry) {
5680 if (!preflight) hibernate_stats.cd_found_laundry++;
5681 break;
5682 }
5683 if (!m->dirty)
5684 {
5685 refmod_state = pmap_get_refmod(m->phys_page);
5686
5687 if (refmod_state & VM_MEM_REFERENCED)
5688 m->reference = TRUE;
5689 if (refmod_state & VM_MEM_MODIFIED) {
5690 SET_PAGE_DIRTY(m, FALSE);
5691 }
5692 }
5693
5694 /*
5695 * If it's clean or purgeable we can discard the page on wakeup.
5696 */
5697 discard = (!m->dirty)
5698 || (VM_PURGABLE_VOLATILE == object->purgable)
5699 || (VM_PURGABLE_EMPTY == object->purgable);
5700
5701
5702 if (discard == FALSE) {
5703 if (!preflight)
5704 hibernate_stats.cd_found_dirty++;
5705 } else if (m->xpmapped && m->reference && !object->internal) {
5706 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5707 if (!preflight)
5708 hibernate_stats.cd_found_xpmapped++;
5709 discard = FALSE;
5710 } else {
5711 if (!preflight)
5712 hibernate_stats.cd_skipped_xpmapped++;
5713 }
5714 }
5715 }
5716 while (FALSE);
5717
5718 if (object)
5719 vm_object_unlock(object);
5720
5721 return (discard);
5722 }
5723
5724
5725 static void
5726 hibernate_discard_page(vm_page_t m)
5727 {
5728 if (m->absent || m->unusual || m->error)
5729 /*
5730 * If it's unusual in anyway, ignore
5731 */
5732 return;
5733
5734 #if MACH_ASSERT || DEBUG
5735 vm_object_t object = m->object;
5736 if (!vm_object_lock_try(m->object))
5737 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5738 #else
5739 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5740 makes sure these locks are uncontended before sleep */
5741 #endif /* MACH_ASSERT || DEBUG */
5742
5743 if (m->pmapped == TRUE)
5744 {
5745 __unused int refmod_state = pmap_disconnect(m->phys_page);
5746 }
5747
5748 if (m->laundry)
5749 panic("hibernate_discard_page(%p) laundry", m);
5750 if (m->private)
5751 panic("hibernate_discard_page(%p) private", m);
5752 if (m->fictitious)
5753 panic("hibernate_discard_page(%p) fictitious", m);
5754
5755 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5756 {
5757 /* object should be on a queue */
5758 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5759 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5760 assert(old_queue);
5761 if (m->object->purgeable_when_ripe) {
5762 vm_purgeable_token_delete_first(old_queue);
5763 }
5764 m->object->purgable = VM_PURGABLE_EMPTY;
5765
5766 /*
5767 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5768 * accounted in the "volatile" ledger, so no change here.
5769 * We have to update vm_page_purgeable_count, though, since we're
5770 * effectively purging this object.
5771 */
5772 unsigned int delta;
5773 assert(m->object->resident_page_count >= m->object->wired_page_count);
5774 delta = (m->object->resident_page_count - m->object->wired_page_count);
5775 assert(vm_page_purgeable_count >= delta);
5776 assert(delta > 0);
5777 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5778 }
5779
5780 vm_page_free(m);
5781
5782 #if MACH_ASSERT || DEBUG
5783 vm_object_unlock(object);
5784 #endif /* MACH_ASSERT || DEBUG */
5785 }
5786
5787 /*
5788 Grab locks for hibernate_page_list_setall()
5789 */
5790 void
5791 hibernate_vm_lock_queues(void)
5792 {
5793 vm_object_lock(compressor_object);
5794 vm_page_lock_queues();
5795 lck_mtx_lock(&vm_page_queue_free_lock);
5796
5797 if (vm_page_local_q) {
5798 uint32_t i;
5799 for (i = 0; i < vm_page_local_q_count; i++) {
5800 struct vpl *lq;
5801 lq = &vm_page_local_q[i].vpl_un.vpl;
5802 VPL_LOCK(&lq->vpl_lock);
5803 }
5804 }
5805 }
5806
5807 void
5808 hibernate_vm_unlock_queues(void)
5809 {
5810 if (vm_page_local_q) {
5811 uint32_t i;
5812 for (i = 0; i < vm_page_local_q_count; i++) {
5813 struct vpl *lq;
5814 lq = &vm_page_local_q[i].vpl_un.vpl;
5815 VPL_UNLOCK(&lq->vpl_lock);
5816 }
5817 }
5818 lck_mtx_unlock(&vm_page_queue_free_lock);
5819 vm_page_unlock_queues();
5820 vm_object_unlock(compressor_object);
5821 }
5822
5823 /*
5824 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5825 pages known to VM to not need saving are subtracted.
5826 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5827 */
5828
5829 void
5830 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5831 hibernate_page_list_t * page_list_wired,
5832 hibernate_page_list_t * page_list_pal,
5833 boolean_t preflight,
5834 boolean_t will_discard,
5835 uint32_t * pagesOut)
5836 {
5837 uint64_t start, end, nsec;
5838 vm_page_t m;
5839 vm_page_t next;
5840 uint32_t pages = page_list->page_count;
5841 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5842 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5843 uint32_t count_wire = pages;
5844 uint32_t count_discard_active = 0;
5845 uint32_t count_discard_inactive = 0;
5846 uint32_t count_discard_cleaned = 0;
5847 uint32_t count_discard_purgeable = 0;
5848 uint32_t count_discard_speculative = 0;
5849 uint32_t count_discard_vm_struct_pages = 0;
5850 uint32_t i;
5851 uint32_t bank;
5852 hibernate_bitmap_t * bitmap;
5853 hibernate_bitmap_t * bitmap_wired;
5854 boolean_t discard_all;
5855 boolean_t discard;
5856
5857 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
5858
5859 if (preflight) {
5860 page_list = NULL;
5861 page_list_wired = NULL;
5862 page_list_pal = NULL;
5863 discard_all = FALSE;
5864 } else {
5865 discard_all = will_discard;
5866 }
5867
5868 #if MACH_ASSERT || DEBUG
5869 if (!preflight)
5870 {
5871 vm_page_lock_queues();
5872 if (vm_page_local_q) {
5873 for (i = 0; i < vm_page_local_q_count; i++) {
5874 struct vpl *lq;
5875 lq = &vm_page_local_q[i].vpl_un.vpl;
5876 VPL_LOCK(&lq->vpl_lock);
5877 }
5878 }
5879 }
5880 #endif /* MACH_ASSERT || DEBUG */
5881
5882
5883 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5884
5885 clock_get_uptime(&start);
5886
5887 if (!preflight) {
5888 hibernate_page_list_zero(page_list);
5889 hibernate_page_list_zero(page_list_wired);
5890 hibernate_page_list_zero(page_list_pal);
5891
5892 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5893 hibernate_stats.cd_pages = pages;
5894 }
5895
5896 if (vm_page_local_q) {
5897 for (i = 0; i < vm_page_local_q_count; i++)
5898 vm_page_reactivate_local(i, TRUE, !preflight);
5899 }
5900
5901 if (preflight) {
5902 vm_object_lock(compressor_object);
5903 vm_page_lock_queues();
5904 lck_mtx_lock(&vm_page_queue_free_lock);
5905 }
5906
5907 m = (vm_page_t) hibernate_gobble_queue;
5908 while (m)
5909 {
5910 pages--;
5911 count_wire--;
5912 if (!preflight) {
5913 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5914 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5915 }
5916 m = (vm_page_t) m->pageq.next;
5917 }
5918
5919 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5920 {
5921 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5922 {
5923 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5924 {
5925 pages--;
5926 count_wire--;
5927 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5928 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5929
5930 hibernate_stats.cd_local_free++;
5931 hibernate_stats.cd_total_free++;
5932 }
5933 }
5934 }
5935
5936 for( i = 0; i < vm_colors; i++ )
5937 {
5938 queue_iterate(&vm_page_queue_free[i],
5939 m,
5940 vm_page_t,
5941 pageq)
5942 {
5943 pages--;
5944 count_wire--;
5945 if (!preflight) {
5946 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5947 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5948
5949 hibernate_stats.cd_total_free++;
5950 }
5951 }
5952 }
5953
5954 queue_iterate(&vm_lopage_queue_free,
5955 m,
5956 vm_page_t,
5957 pageq)
5958 {
5959 pages--;
5960 count_wire--;
5961 if (!preflight) {
5962 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5963 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5964
5965 hibernate_stats.cd_total_free++;
5966 }
5967 }
5968
5969 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5970 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5971 {
5972 next = (vm_page_t) m->pageq.next;
5973 discard = FALSE;
5974 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5975 && hibernate_consider_discard(m, preflight))
5976 {
5977 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5978 count_discard_inactive++;
5979 discard = discard_all;
5980 }
5981 else
5982 count_throttled++;
5983 count_wire--;
5984 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5985
5986 if (discard) hibernate_discard_page(m);
5987 m = next;
5988 }
5989
5990 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5991 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5992 {
5993 next = (vm_page_t) m->pageq.next;
5994 discard = FALSE;
5995 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5996 && hibernate_consider_discard(m, preflight))
5997 {
5998 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5999 if (m->dirty)
6000 count_discard_purgeable++;
6001 else
6002 count_discard_inactive++;
6003 discard = discard_all;
6004 }
6005 else
6006 count_anonymous++;
6007 count_wire--;
6008 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6009 if (discard) hibernate_discard_page(m);
6010 m = next;
6011 }
6012
6013 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6014 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6015 {
6016 next = (vm_page_t) m->pageq.next;
6017 discard = FALSE;
6018 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6019 && hibernate_consider_discard(m, preflight))
6020 {
6021 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6022 if (m->dirty)
6023 count_discard_purgeable++;
6024 else
6025 count_discard_cleaned++;
6026 discard = discard_all;
6027 }
6028 else
6029 count_cleaned++;
6030 count_wire--;
6031 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6032 if (discard) hibernate_discard_page(m);
6033 m = next;
6034 }
6035
6036 m = (vm_page_t) queue_first(&vm_page_queue_active);
6037 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6038 {
6039 next = (vm_page_t) m->pageq.next;
6040 discard = FALSE;
6041 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
6042 && hibernate_consider_discard(m, preflight))
6043 {
6044 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6045 if (m->dirty)
6046 count_discard_purgeable++;
6047 else
6048 count_discard_active++;
6049 discard = discard_all;
6050 }
6051 else
6052 count_active++;
6053 count_wire--;
6054 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6055 if (discard) hibernate_discard_page(m);
6056 m = next;
6057 }
6058
6059 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6060 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6061 {
6062 next = (vm_page_t) m->pageq.next;
6063 discard = FALSE;
6064 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6065 && hibernate_consider_discard(m, preflight))
6066 {
6067 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6068 if (m->dirty)
6069 count_discard_purgeable++;
6070 else
6071 count_discard_inactive++;
6072 discard = discard_all;
6073 }
6074 else
6075 count_inactive++;
6076 count_wire--;
6077 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6078 if (discard) hibernate_discard_page(m);
6079 m = next;
6080 }
6081
6082 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6083 {
6084 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6085 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6086 {
6087 next = (vm_page_t) m->pageq.next;
6088 discard = FALSE;
6089 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6090 && hibernate_consider_discard(m, preflight))
6091 {
6092 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6093 count_discard_speculative++;
6094 discard = discard_all;
6095 }
6096 else
6097 count_speculative++;
6098 count_wire--;
6099 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6100 if (discard) hibernate_discard_page(m);
6101 m = next;
6102 }
6103 }
6104
6105 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
6106 {
6107 count_compressor++;
6108 count_wire--;
6109 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6110 }
6111
6112 if (preflight == FALSE && discard_all == TRUE) {
6113 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6114
6115 HIBLOG("hibernate_teardown started\n");
6116 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
6117 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
6118
6119 pages -= count_discard_vm_struct_pages;
6120 count_wire -= count_discard_vm_struct_pages;
6121
6122 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
6123
6124 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6125 }
6126
6127 if (!preflight) {
6128 // pull wired from hibernate_bitmap
6129 bitmap = &page_list->bank_bitmap[0];
6130 bitmap_wired = &page_list_wired->bank_bitmap[0];
6131 for (bank = 0; bank < page_list->bank_count; bank++)
6132 {
6133 for (i = 0; i < bitmap->bitmapwords; i++)
6134 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
6135 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
6136 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
6137 }
6138 }
6139
6140 // machine dependent adjustments
6141 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6142
6143 if (!preflight) {
6144 hibernate_stats.cd_count_wire = count_wire;
6145 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6146 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6147 }
6148
6149 clock_get_uptime(&end);
6150 absolutetime_to_nanoseconds(end - start, &nsec);
6151 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6152
6153 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6154 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6155 discard_all ? "did" : "could",
6156 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6157
6158 if (hibernate_stats.cd_skipped_xpmapped)
6159 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6160
6161 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6162
6163 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6164
6165 #if MACH_ASSERT || DEBUG
6166 if (!preflight)
6167 {
6168 if (vm_page_local_q) {
6169 for (i = 0; i < vm_page_local_q_count; i++) {
6170 struct vpl *lq;
6171 lq = &vm_page_local_q[i].vpl_un.vpl;
6172 VPL_UNLOCK(&lq->vpl_lock);
6173 }
6174 }
6175 vm_page_unlock_queues();
6176 }
6177 #endif /* MACH_ASSERT || DEBUG */
6178
6179 if (preflight) {
6180 lck_mtx_unlock(&vm_page_queue_free_lock);
6181 vm_page_unlock_queues();
6182 vm_object_unlock(compressor_object);
6183 }
6184
6185 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6186 }
6187
6188 void
6189 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6190 {
6191 uint64_t start, end, nsec;
6192 vm_page_t m;
6193 vm_page_t next;
6194 uint32_t i;
6195 uint32_t count_discard_active = 0;
6196 uint32_t count_discard_inactive = 0;
6197 uint32_t count_discard_purgeable = 0;
6198 uint32_t count_discard_cleaned = 0;
6199 uint32_t count_discard_speculative = 0;
6200
6201
6202 #if MACH_ASSERT || DEBUG
6203 vm_page_lock_queues();
6204 if (vm_page_local_q) {
6205 for (i = 0; i < vm_page_local_q_count; i++) {
6206 struct vpl *lq;
6207 lq = &vm_page_local_q[i].vpl_un.vpl;
6208 VPL_LOCK(&lq->vpl_lock);
6209 }
6210 }
6211 #endif /* MACH_ASSERT || DEBUG */
6212
6213 clock_get_uptime(&start);
6214
6215 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6216 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6217 {
6218 next = (vm_page_t) m->pageq.next;
6219 if (hibernate_page_bittst(page_list, m->phys_page))
6220 {
6221 if (m->dirty)
6222 count_discard_purgeable++;
6223 else
6224 count_discard_inactive++;
6225 hibernate_discard_page(m);
6226 }
6227 m = next;
6228 }
6229
6230 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6231 {
6232 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6233 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6234 {
6235 next = (vm_page_t) m->pageq.next;
6236 if (hibernate_page_bittst(page_list, m->phys_page))
6237 {
6238 count_discard_speculative++;
6239 hibernate_discard_page(m);
6240 }
6241 m = next;
6242 }
6243 }
6244
6245 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6246 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6247 {
6248 next = (vm_page_t) m->pageq.next;
6249 if (hibernate_page_bittst(page_list, m->phys_page))
6250 {
6251 if (m->dirty)
6252 count_discard_purgeable++;
6253 else
6254 count_discard_inactive++;
6255 hibernate_discard_page(m);
6256 }
6257 m = next;
6258 }
6259
6260 m = (vm_page_t) queue_first(&vm_page_queue_active);
6261 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6262 {
6263 next = (vm_page_t) m->pageq.next;
6264 if (hibernate_page_bittst(page_list, m->phys_page))
6265 {
6266 if (m->dirty)
6267 count_discard_purgeable++;
6268 else
6269 count_discard_active++;
6270 hibernate_discard_page(m);
6271 }
6272 m = next;
6273 }
6274
6275 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6276 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6277 {
6278 next = (vm_page_t) m->pageq.next;
6279 if (hibernate_page_bittst(page_list, m->phys_page))
6280 {
6281 if (m->dirty)
6282 count_discard_purgeable++;
6283 else
6284 count_discard_cleaned++;
6285 hibernate_discard_page(m);
6286 }
6287 m = next;
6288 }
6289
6290 #if MACH_ASSERT || DEBUG
6291 if (vm_page_local_q) {
6292 for (i = 0; i < vm_page_local_q_count; i++) {
6293 struct vpl *lq;
6294 lq = &vm_page_local_q[i].vpl_un.vpl;
6295 VPL_UNLOCK(&lq->vpl_lock);
6296 }
6297 }
6298 vm_page_unlock_queues();
6299 #endif /* MACH_ASSERT || DEBUG */
6300
6301 clock_get_uptime(&end);
6302 absolutetime_to_nanoseconds(end - start, &nsec);
6303 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6304 nsec / 1000000ULL,
6305 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6306 }
6307
6308 boolean_t hibernate_paddr_map_inited = FALSE;
6309 boolean_t hibernate_rebuild_needed = FALSE;
6310 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
6311 vm_page_t hibernate_rebuild_hash_list = NULL;
6312
6313 unsigned int hibernate_teardown_found_tabled_pages = 0;
6314 unsigned int hibernate_teardown_found_created_pages = 0;
6315 unsigned int hibernate_teardown_found_free_pages = 0;
6316 unsigned int hibernate_teardown_vm_page_free_count;
6317
6318
6319 struct ppnum_mapping {
6320 struct ppnum_mapping *ppnm_next;
6321 ppnum_t ppnm_base_paddr;
6322 unsigned int ppnm_sindx;
6323 unsigned int ppnm_eindx;
6324 };
6325
6326 struct ppnum_mapping *ppnm_head;
6327 struct ppnum_mapping *ppnm_last_found = NULL;
6328
6329
6330 void
6331 hibernate_create_paddr_map()
6332 {
6333 unsigned int i;
6334 ppnum_t next_ppnum_in_run = 0;
6335 struct ppnum_mapping *ppnm = NULL;
6336
6337 if (hibernate_paddr_map_inited == FALSE) {
6338
6339 for (i = 0; i < vm_pages_count; i++) {
6340
6341 if (ppnm)
6342 ppnm->ppnm_eindx = i;
6343
6344 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6345
6346 ppnm = kalloc(sizeof(struct ppnum_mapping));
6347
6348 ppnm->ppnm_next = ppnm_head;
6349 ppnm_head = ppnm;
6350
6351 ppnm->ppnm_sindx = i;
6352 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6353 }
6354 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6355 }
6356 ppnm->ppnm_eindx++;
6357
6358 hibernate_paddr_map_inited = TRUE;
6359 }
6360 }
6361
6362 ppnum_t
6363 hibernate_lookup_paddr(unsigned int indx)
6364 {
6365 struct ppnum_mapping *ppnm = NULL;
6366
6367 ppnm = ppnm_last_found;
6368
6369 if (ppnm) {
6370 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6371 goto done;
6372 }
6373 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6374
6375 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6376 ppnm_last_found = ppnm;
6377 break;
6378 }
6379 }
6380 if (ppnm == NULL)
6381 panic("hibernate_lookup_paddr of %d failed\n", indx);
6382 done:
6383 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6384 }
6385
6386
6387 uint32_t
6388 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6389 {
6390 addr64_t saddr_aligned;
6391 addr64_t eaddr_aligned;
6392 addr64_t addr;
6393 ppnum_t paddr;
6394 unsigned int mark_as_unneeded_pages = 0;
6395
6396 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6397 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6398
6399 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6400
6401 paddr = pmap_find_phys(kernel_pmap, addr);
6402
6403 assert(paddr);
6404
6405 hibernate_page_bitset(page_list, TRUE, paddr);
6406 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6407
6408 mark_as_unneeded_pages++;
6409 }
6410 return (mark_as_unneeded_pages);
6411 }
6412
6413
6414 void
6415 hibernate_hash_insert_page(vm_page_t mem)
6416 {
6417 vm_page_bucket_t *bucket;
6418 int hash_id;
6419
6420 assert(mem->hashed);
6421 assert(mem->object);
6422 assert(mem->offset != (vm_object_offset_t) -1);
6423
6424 /*
6425 * Insert it into the object_object/offset hash table
6426 */
6427 hash_id = vm_page_hash(mem->object, mem->offset);
6428 bucket = &vm_page_buckets[hash_id];
6429
6430 mem->next_m = bucket->page_list;
6431 bucket->page_list = VM_PAGE_PACK_PTR(mem);
6432 }
6433
6434
6435 void
6436 hibernate_free_range(int sindx, int eindx)
6437 {
6438 vm_page_t mem;
6439 unsigned int color;
6440
6441 while (sindx < eindx) {
6442 mem = &vm_pages[sindx];
6443
6444 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6445
6446 mem->lopage = FALSE;
6447 mem->free = TRUE;
6448
6449 color = mem->phys_page & vm_color_mask;
6450 queue_enter_first(&vm_page_queue_free[color],
6451 mem,
6452 vm_page_t,
6453 pageq);
6454 vm_page_free_count++;
6455
6456 sindx++;
6457 }
6458 }
6459
6460
6461 extern void hibernate_rebuild_pmap_structs(void);
6462
6463 void
6464 hibernate_rebuild_vm_structs(void)
6465 {
6466 int cindx, sindx, eindx;
6467 vm_page_t mem, tmem, mem_next;
6468 AbsoluteTime startTime, endTime;
6469 uint64_t nsec;
6470
6471 if (hibernate_rebuild_needed == FALSE)
6472 return;
6473
6474 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6475 HIBLOG("hibernate_rebuild started\n");
6476
6477 clock_get_uptime(&startTime);
6478
6479 hibernate_rebuild_pmap_structs();
6480
6481 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6482 eindx = vm_pages_count;
6483
6484 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6485
6486 mem = &vm_pages[cindx];
6487 /*
6488 * hibernate_teardown_vm_structs leaves the location where
6489 * this vm_page_t must be located in "next".
6490 */
6491 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6492 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6493
6494 sindx = (int)(tmem - &vm_pages[0]);
6495
6496 if (mem != tmem) {
6497 /*
6498 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6499 * so move it back to its real location
6500 */
6501 *tmem = *mem;
6502 mem = tmem;
6503 }
6504 if (mem->hashed)
6505 hibernate_hash_insert_page(mem);
6506 /*
6507 * the 'hole' between this vm_page_t and the previous
6508 * vm_page_t we moved needs to be initialized as
6509 * a range of free vm_page_t's
6510 */
6511 hibernate_free_range(sindx + 1, eindx);
6512
6513 eindx = sindx;
6514 }
6515 if (sindx)
6516 hibernate_free_range(0, sindx);
6517
6518 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6519
6520 /*
6521 * process the list of vm_page_t's that were entered in the hash,
6522 * but were not located in the vm_pages arrary... these are
6523 * vm_page_t's that were created on the fly (i.e. fictitious)
6524 */
6525 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6526 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6527
6528 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6529 hibernate_hash_insert_page(mem);
6530 }
6531 hibernate_rebuild_hash_list = NULL;
6532
6533 clock_get_uptime(&endTime);
6534 SUB_ABSOLUTETIME(&endTime, &startTime);
6535 absolutetime_to_nanoseconds(endTime, &nsec);
6536
6537 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6538
6539 hibernate_rebuild_needed = FALSE;
6540
6541 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6542 }
6543
6544
6545 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6546
6547 uint32_t
6548 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6549 {
6550 unsigned int i;
6551 unsigned int compact_target_indx;
6552 vm_page_t mem, mem_next;
6553 vm_page_bucket_t *bucket;
6554 unsigned int mark_as_unneeded_pages = 0;
6555 unsigned int unneeded_vm_page_bucket_pages = 0;
6556 unsigned int unneeded_vm_pages_pages = 0;
6557 unsigned int unneeded_pmap_pages = 0;
6558 addr64_t start_of_unneeded = 0;
6559 addr64_t end_of_unneeded = 0;
6560
6561
6562 if (hibernate_should_abort())
6563 return (0);
6564
6565 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6566 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6567 vm_page_cleaned_count, compressor_object->resident_page_count);
6568
6569 for (i = 0; i < vm_page_bucket_count; i++) {
6570
6571 bucket = &vm_page_buckets[i];
6572
6573 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6574 assert(mem->hashed);
6575
6576 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6577
6578 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6579 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6580 hibernate_rebuild_hash_list = mem;
6581 }
6582 }
6583 }
6584 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6585 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6586
6587 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6588
6589 compact_target_indx = 0;
6590
6591 for (i = 0; i < vm_pages_count; i++) {
6592
6593 mem = &vm_pages[i];
6594
6595 if (mem->free) {
6596 unsigned int color;
6597
6598 assert(mem->busy);
6599 assert(!mem->lopage);
6600
6601 color = mem->phys_page & vm_color_mask;
6602
6603 queue_remove(&vm_page_queue_free[color],
6604 mem,
6605 vm_page_t,
6606 pageq);
6607 mem->pageq.next = NULL;
6608 mem->pageq.prev = NULL;
6609
6610 vm_page_free_count--;
6611
6612 hibernate_teardown_found_free_pages++;
6613
6614 if ( !vm_pages[compact_target_indx].free)
6615 compact_target_indx = i;
6616 } else {
6617 /*
6618 * record this vm_page_t's original location
6619 * we need this even if it doesn't get moved
6620 * as an indicator to the rebuild function that
6621 * we don't have to move it
6622 */
6623 mem->next_m = VM_PAGE_PACK_PTR(mem);
6624
6625 if (vm_pages[compact_target_indx].free) {
6626 /*
6627 * we've got a hole to fill, so
6628 * move this vm_page_t to it's new home
6629 */
6630 vm_pages[compact_target_indx] = *mem;
6631 mem->free = TRUE;
6632
6633 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6634 compact_target_indx++;
6635 } else
6636 hibernate_teardown_last_valid_compact_indx = i;
6637 }
6638 }
6639 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6640 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6641 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6642
6643 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6644
6645 if (start_of_unneeded) {
6646 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6647 mark_as_unneeded_pages += unneeded_pmap_pages;
6648 }
6649 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6650
6651 hibernate_rebuild_needed = TRUE;
6652
6653 return (mark_as_unneeded_pages);
6654 }
6655
6656
6657 #endif /* HIBERNATION */
6658
6659 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6660
6661 #include <mach_vm_debug.h>
6662 #if MACH_VM_DEBUG
6663
6664 #include <mach_debug/hash_info.h>
6665 #include <vm/vm_debug.h>
6666
6667 /*
6668 * Routine: vm_page_info
6669 * Purpose:
6670 * Return information about the global VP table.
6671 * Fills the buffer with as much information as possible
6672 * and returns the desired size of the buffer.
6673 * Conditions:
6674 * Nothing locked. The caller should provide
6675 * possibly-pageable memory.
6676 */
6677
6678 unsigned int
6679 vm_page_info(
6680 hash_info_bucket_t *info,
6681 unsigned int count)
6682 {
6683 unsigned int i;
6684 lck_spin_t *bucket_lock;
6685
6686 if (vm_page_bucket_count < count)
6687 count = vm_page_bucket_count;
6688
6689 for (i = 0; i < count; i++) {
6690 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6691 unsigned int bucket_count = 0;
6692 vm_page_t m;
6693
6694 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6695 lck_spin_lock(bucket_lock);
6696
6697 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6698 bucket_count++;
6699
6700 lck_spin_unlock(bucket_lock);
6701
6702 /* don't touch pageable memory while holding locks */
6703 info[i].hib_count = bucket_count;
6704 }
6705
6706 return vm_page_bucket_count;
6707 }
6708 #endif /* MACH_VM_DEBUG */
6709
6710 #if VM_PAGE_BUCKETS_CHECK
6711 void
6712 vm_page_buckets_check(void)
6713 {
6714 unsigned int i;
6715 vm_page_t p;
6716 unsigned int p_hash;
6717 vm_page_bucket_t *bucket;
6718 lck_spin_t *bucket_lock;
6719
6720 if (!vm_page_buckets_check_ready) {
6721 return;
6722 }
6723
6724 #if HIBERNATION
6725 if (hibernate_rebuild_needed ||
6726 hibernate_rebuild_hash_list) {
6727 panic("BUCKET_CHECK: hibernation in progress: "
6728 "rebuild_needed=%d rebuild_hash_list=%p\n",
6729 hibernate_rebuild_needed,
6730 hibernate_rebuild_hash_list);
6731 }
6732 #endif /* HIBERNATION */
6733
6734 #if VM_PAGE_FAKE_BUCKETS
6735 char *cp;
6736 for (cp = (char *) vm_page_fake_buckets_start;
6737 cp < (char *) vm_page_fake_buckets_end;
6738 cp++) {
6739 if (*cp != 0x5a) {
6740 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6741 "[0x%llx:0x%llx]\n",
6742 cp,
6743 (uint64_t) vm_page_fake_buckets_start,
6744 (uint64_t) vm_page_fake_buckets_end);
6745 }
6746 }
6747 #endif /* VM_PAGE_FAKE_BUCKETS */
6748
6749 for (i = 0; i < vm_page_bucket_count; i++) {
6750 bucket = &vm_page_buckets[i];
6751 if (!bucket->page_list) {
6752 continue;
6753 }
6754
6755 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6756 lck_spin_lock(bucket_lock);
6757 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6758 while (p != VM_PAGE_NULL) {
6759 if (!p->hashed) {
6760 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6761 "hash %d in bucket %d at %p "
6762 "is not hashed\n",
6763 p, p->object, p->offset,
6764 p_hash, i, bucket);
6765 }
6766 p_hash = vm_page_hash(p->object, p->offset);
6767 if (p_hash != i) {
6768 panic("BUCKET_CHECK: corruption in bucket %d "
6769 "at %p: page %p object %p offset 0x%llx "
6770 "hash %d\n",
6771 i, bucket, p, p->object, p->offset,
6772 p_hash);
6773 }
6774 p = VM_PAGE_UNPACK_PTR(p->next_m);
6775 }
6776 lck_spin_unlock(bucket_lock);
6777 }
6778
6779 // printf("BUCKET_CHECK: checked buckets\n");
6780 }
6781 #endif /* VM_PAGE_BUCKETS_CHECK */
6782
6783 /*
6784 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
6785 * local queues if they exist... its the only spot in the system where we add pages
6786 * to those queues... once on those queues, those pages can only move to one of the
6787 * global page queues or the free queues... they NEVER move from local q to local q.
6788 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
6789 * the global vm_page_queue_lock at this point... we still need to take the local lock
6790 * in case this operation is being run on a different CPU then the local queue's identity,
6791 * but we don't have to worry about the page moving to a global queue or becoming wired
6792 * while we're grabbing the local lock since those operations would require the global
6793 * vm_page_queue_lock to be held, and we already own it.
6794 *
6795 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
6796 * 'wired' and local are ALWAYS mutually exclusive conditions.
6797 */
6798 void
6799 vm_page_queues_remove(vm_page_t mem)
6800 {
6801 boolean_t was_pageable;
6802
6803 VM_PAGE_QUEUES_ASSERT(mem, 1);
6804 assert(!mem->pageout_queue);
6805 /*
6806 * if (mem->pageout_queue)
6807 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
6808 * the caller is responsible for determing if the page is on that queue, and if so, must
6809 * either first remove it (it needs both the page queues lock and the object lock to do
6810 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
6811 */
6812 if (mem->local) {
6813 struct vpl *lq;
6814 assert(mem->object != kernel_object);
6815 assert(mem->object != compressor_object);
6816 assert(!mem->inactive && !mem->speculative);
6817 assert(!mem->active && !mem->throttled);
6818 assert(!mem->clean_queue);
6819 assert(!mem->fictitious);
6820 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
6821 VPL_LOCK(&lq->vpl_lock);
6822 queue_remove(&lq->vpl_queue,
6823 mem, vm_page_t, pageq);
6824 mem->local = FALSE;
6825 mem->local_id = 0;
6826 lq->vpl_count--;
6827 if (mem->object->internal) {
6828 lq->vpl_internal_count--;
6829 } else {
6830 lq->vpl_external_count--;
6831 }
6832 VPL_UNLOCK(&lq->vpl_lock);
6833 was_pageable = FALSE;
6834 }
6835
6836 else if (mem->active) {
6837 assert(mem->object != kernel_object);
6838 assert(mem->object != compressor_object);
6839 assert(!mem->inactive && !mem->speculative);
6840 assert(!mem->clean_queue);
6841 assert(!mem->throttled);
6842 assert(!mem->fictitious);
6843 queue_remove(&vm_page_queue_active,
6844 mem, vm_page_t, pageq);
6845 mem->active = FALSE;
6846 vm_page_active_count--;
6847 was_pageable = TRUE;
6848 }
6849
6850 else if (mem->inactive) {
6851 assert(mem->object != kernel_object);
6852 assert(mem->object != compressor_object);
6853 assert(!mem->active && !mem->speculative);
6854 assert(!mem->throttled);
6855 assert(!mem->fictitious);
6856 vm_page_inactive_count--;
6857 if (mem->clean_queue) {
6858 queue_remove(&vm_page_queue_cleaned,
6859 mem, vm_page_t, pageq);
6860 mem->clean_queue = FALSE;
6861 vm_page_cleaned_count--;
6862 } else {
6863 if (mem->object->internal) {
6864 queue_remove(&vm_page_queue_anonymous,
6865 mem, vm_page_t, pageq);
6866 vm_page_anonymous_count--;
6867 } else {
6868 queue_remove(&vm_page_queue_inactive,
6869 mem, vm_page_t, pageq);
6870 }
6871 vm_purgeable_q_advance_all();
6872 }
6873 mem->inactive = FALSE;
6874 was_pageable = TRUE;
6875 }
6876
6877 else if (mem->throttled) {
6878 assert(mem->object != compressor_object);
6879 assert(!mem->active && !mem->inactive);
6880 assert(!mem->speculative);
6881 assert(!mem->fictitious);
6882 queue_remove(&vm_page_queue_throttled,
6883 mem, vm_page_t, pageq);
6884 mem->throttled = FALSE;
6885 vm_page_throttled_count--;
6886 was_pageable = FALSE;
6887 }
6888
6889 else if (mem->speculative) {
6890 assert(mem->object != compressor_object);
6891 assert(!mem->active && !mem->inactive);
6892 assert(!mem->throttled);
6893 assert(!mem->fictitious);
6894 remque(&mem->pageq);
6895 mem->speculative = FALSE;
6896 vm_page_speculative_count--;
6897 was_pageable = TRUE;
6898 }
6899
6900 else if (mem->pageq.next || mem->pageq.prev) {
6901 was_pageable = FALSE;
6902 panic("vm_page_queues_remove: unmarked page on Q");
6903 } else {
6904 was_pageable = FALSE;
6905 }
6906
6907 mem->pageq.next = NULL;
6908 mem->pageq.prev = NULL;
6909 VM_PAGE_QUEUES_ASSERT(mem, 0);
6910 if (was_pageable) {
6911 if (mem->object->internal) {
6912 vm_page_pageable_internal_count--;
6913 } else {
6914 vm_page_pageable_external_count--;
6915 }
6916 }
6917 }
6918
6919 void
6920 vm_page_remove_internal(vm_page_t page)
6921 {
6922 vm_object_t __object = page->object;
6923 if (page == __object->memq_hint) {
6924 vm_page_t __new_hint;
6925 queue_entry_t __qe;
6926 __qe = queue_next(&page->listq);
6927 if (queue_end(&__object->memq, __qe)) {
6928 __qe = queue_prev(&page->listq);
6929 if (queue_end(&__object->memq, __qe)) {
6930 __qe = NULL;
6931 }
6932 }
6933 __new_hint = (vm_page_t) __qe;
6934 __object->memq_hint = __new_hint;
6935 }
6936 queue_remove(&__object->memq, page, vm_page_t, listq);
6937 }
6938
6939 void
6940 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
6941 {
6942 VM_PAGE_QUEUES_ASSERT(mem, 0);
6943 assert(!mem->fictitious);
6944 assert(!mem->laundry);
6945 assert(!mem->pageout_queue);
6946 vm_page_check_pageable_safe(mem);
6947 if (mem->object->internal) {
6948 if (first == TRUE)
6949 queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
6950 else
6951 queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
6952 vm_page_anonymous_count++;
6953 vm_page_pageable_internal_count++;
6954 } else {
6955 if (first == TRUE)
6956 queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
6957 else
6958 queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
6959 vm_page_pageable_external_count++;
6960 }
6961 mem->inactive = TRUE;
6962 vm_page_inactive_count++;
6963 token_new_pagecount++;
6964 }
6965
6966 /*
6967 * Pages from special kernel objects shouldn't
6968 * be placed on pageable queues.
6969 */
6970 void
6971 vm_page_check_pageable_safe(vm_page_t page)
6972 {
6973 if (page->object == kernel_object) {
6974 panic("vm_page_check_pageable_safe: trying to add page" \
6975 "from kernel object (%p) to pageable queue", kernel_object);
6976 }
6977
6978 if (page->object == compressor_object) {
6979 panic("vm_page_check_pageable_safe: trying to add page" \
6980 "from compressor object (%p) to pageable queue", compressor_object);
6981 }
6982
6983 if (page->object == vm_submap_object) {
6984 panic("vm_page_check_pageable_safe: trying to add page" \
6985 "from submap object (%p) to pageable queue", vm_submap_object);
6986 }
6987 }
6988
6989 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
6990 * wired page diagnose
6991 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6992
6993 #include <libkern/OSKextLibPrivate.h>
6994
6995 vm_allocation_site_t *
6996 vm_allocation_sites[VM_KERN_MEMORY_COUNT];
6997
6998 vm_tag_t
6999 vm_tag_bt(void)
7000 {
7001 uintptr_t* frameptr;
7002 uintptr_t* frameptr_next;
7003 uintptr_t retaddr;
7004 uintptr_t kstackb, kstackt;
7005 const vm_allocation_site_t * site;
7006 thread_t cthread;
7007
7008 cthread = current_thread();
7009 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
7010
7011 kstackb = cthread->kernel_stack;
7012 kstackt = kstackb + kernel_stack_size;
7013
7014 /* Load stack frame pointer (EBP on x86) into frameptr */
7015 frameptr = __builtin_frame_address(0);
7016 site = NULL;
7017 while (frameptr != NULL)
7018 {
7019 /* Verify thread stack bounds */
7020 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
7021
7022 /* Next frame pointer is pointed to by the previous one */
7023 frameptr_next = (uintptr_t*) *frameptr;
7024
7025 /* Pull return address from one spot above the frame pointer */
7026 retaddr = *(frameptr + 1);
7027
7028 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
7029 {
7030 site = OSKextGetAllocationSiteForCaller(retaddr);
7031 break;
7032 }
7033
7034 frameptr = frameptr_next;
7035 }
7036 return (site ? site->tag : VM_KERN_MEMORY_NONE);
7037 }
7038
7039 static uint64_t free_tag_bits[256/64];
7040
7041 void
7042 vm_tag_alloc_locked(vm_allocation_site_t * site)
7043 {
7044 vm_tag_t tag;
7045 uint64_t avail;
7046 uint64_t idx;
7047
7048 if (site->tag) return;
7049
7050 idx = 0;
7051 while (TRUE)
7052 {
7053 avail = free_tag_bits[idx];
7054 if (avail)
7055 {
7056 tag = __builtin_clzll(avail);
7057 avail &= ~(1ULL << (63 - tag));
7058 free_tag_bits[idx] = avail;
7059 tag += (idx << 6);
7060 break;
7061 }
7062 idx++;
7063 if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
7064 {
7065 tag = VM_KERN_MEMORY_ANY;
7066 break;
7067 }
7068 }
7069 site->tag = tag;
7070 if (VM_KERN_MEMORY_ANY != tag)
7071 {
7072 assert(!vm_allocation_sites[tag]);
7073 vm_allocation_sites[tag] = site;
7074 }
7075 }
7076
7077 static void
7078 vm_tag_free_locked(vm_tag_t tag)
7079 {
7080 uint64_t avail;
7081 uint32_t idx;
7082 uint64_t bit;
7083
7084 if (VM_KERN_MEMORY_ANY == tag) return;
7085
7086 idx = (tag >> 6);
7087 avail = free_tag_bits[idx];
7088 tag &= 63;
7089 bit = (1ULL << (63 - tag));
7090 assert(!(avail & bit));
7091 free_tag_bits[idx] = (avail | bit);
7092 }
7093
7094 static void
7095 vm_tag_init(void)
7096 {
7097 vm_tag_t tag;
7098 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
7099 {
7100 vm_tag_free_locked(tag);
7101 }
7102 }
7103
7104 vm_tag_t
7105 vm_tag_alloc(vm_allocation_site_t * site)
7106 {
7107 vm_tag_t tag;
7108
7109 if (VM_TAG_BT & site->flags)
7110 {
7111 tag = vm_tag_bt();
7112 if (VM_KERN_MEMORY_NONE != tag) return (tag);
7113 }
7114
7115 if (!site->tag)
7116 {
7117 lck_spin_lock(&vm_allocation_sites_lock);
7118 vm_tag_alloc_locked(site);
7119 lck_spin_unlock(&vm_allocation_sites_lock);
7120 }
7121
7122 return (site->tag);
7123 }
7124
7125 static void
7126 vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
7127 {
7128 if (!object->wired_page_count) return;
7129 if (object != kernel_object)
7130 {
7131 assert(object->wire_tag < num_sites);
7132 sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
7133 }
7134 }
7135
7136 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites,
7137 unsigned int num_sites, vm_object_t object);
7138
7139 static void
7140 vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
7141 vm_page_iterate_proc proc, purgeable_q_t queue,
7142 int group)
7143 {
7144 vm_object_t object;
7145
7146 for (object = (vm_object_t) queue_first(&queue->objq[group]);
7147 !queue_end(&queue->objq[group], (queue_entry_t) object);
7148 object = (vm_object_t) queue_next(&object->objq))
7149 {
7150 proc(sites, num_sites, object);
7151 }
7152 }
7153
7154 static void
7155 vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
7156 vm_page_iterate_proc proc)
7157 {
7158 purgeable_q_t volatile_q;
7159 queue_head_t * nonvolatile_q;
7160 vm_object_t object;
7161 int group;
7162
7163 lck_spin_lock(&vm_objects_wired_lock);
7164 queue_iterate(&vm_objects_wired,
7165 object,
7166 vm_object_t,
7167 objq)
7168 {
7169 proc(sites, num_sites, object);
7170 }
7171 lck_spin_unlock(&vm_objects_wired_lock);
7172
7173 lck_mtx_lock(&vm_purgeable_queue_lock);
7174 nonvolatile_q = &purgeable_nonvolatile_queue;
7175 for (object = (vm_object_t) queue_first(nonvolatile_q);
7176 !queue_end(nonvolatile_q, (queue_entry_t) object);
7177 object = (vm_object_t) queue_next(&object->objq))
7178 {
7179 proc(sites, num_sites, object);
7180 }
7181
7182 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
7183 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
7184
7185 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
7186 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
7187 {
7188 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
7189 }
7190
7191 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
7192 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
7193 {
7194 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
7195 }
7196 lck_mtx_unlock(&vm_purgeable_queue_lock);
7197 }
7198
7199 static uint64_t
7200 process_account(mach_memory_info_t * sites, unsigned int __unused num_sites)
7201 {
7202 uint64_t found;
7203 unsigned int idx;
7204 vm_allocation_site_t * site;
7205
7206 assert(num_sites >= VM_KERN_MEMORY_COUNT);
7207 found = 0;
7208 for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++)
7209 {
7210 found += sites[idx].size;
7211 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
7212 {
7213 sites[idx].site = idx;
7214 sites[idx].flags |= VM_KERN_SITE_TAG;
7215 if (VM_KERN_MEMORY_ZONE == idx) sites[idx].flags |= VM_KERN_SITE_HIDE;
7216 else sites[idx].flags |= VM_KERN_SITE_WIRED;
7217 continue;
7218 }
7219 lck_spin_lock(&vm_allocation_sites_lock);
7220 if ((site = vm_allocation_sites[idx]))
7221 {
7222 if (sites[idx].size)
7223 {
7224 sites[idx].flags |= VM_KERN_SITE_WIRED;
7225 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
7226 {
7227 sites[idx].site = OSKextGetKmodIDForSite(site);
7228 sites[idx].flags |= VM_KERN_SITE_KMOD;
7229 }
7230 else
7231 {
7232 sites[idx].site = VM_KERNEL_UNSLIDE(site);
7233 sites[idx].flags |= VM_KERN_SITE_KERNEL;
7234 }
7235 site = NULL;
7236 }
7237 else
7238 {
7239 #if 1
7240 site = NULL;
7241 #else
7242 /* this code would free a site with no allocations but can race a new
7243 * allocation being made */
7244 vm_tag_free_locked(site->tag);
7245 site->tag = VM_KERN_MEMORY_NONE;
7246 vm_allocation_sites[idx] = NULL;
7247 if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
7248 #endif
7249 }
7250 }
7251 lck_spin_unlock(&vm_allocation_sites_lock);
7252 if (site) OSKextFreeSite(site);
7253 }
7254 return (found);
7255 }
7256
7257 kern_return_t
7258 vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites)
7259 {
7260 enum { kMaxKernelDepth = 1 };
7261 vm_map_t maps [kMaxKernelDepth];
7262 vm_map_entry_t entries[kMaxKernelDepth];
7263 vm_map_t map;
7264 vm_map_entry_t entry;
7265 vm_object_offset_t offset;
7266 vm_page_t page;
7267 int stackIdx, count;
7268 uint64_t wired_size;
7269 uint64_t wired_managed_size;
7270 uint64_t wired_reserved_size;
7271 mach_memory_info_t * counts;
7272
7273 bzero(sites, num_sites * sizeof(mach_memory_info_t));
7274
7275 vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
7276
7277 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
7278 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
7279 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
7280
7281 assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
7282 counts = &sites[VM_KERN_MEMORY_COUNT];
7283
7284 #define SET_COUNT(xcount, xsize, xflags) \
7285 counts[xcount].site = (xcount); \
7286 counts[xcount].size = (xsize); \
7287 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
7288
7289 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
7290 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
7291 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
7292 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
7293 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
7294 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
7295
7296 #define SET_MAP(xcount, xsize, xfree, xlargest) \
7297 counts[xcount].site = (xcount); \
7298 counts[xcount].size = (xsize); \
7299 counts[xcount].free = (xfree); \
7300 counts[xcount].largest = (xlargest); \
7301 counts[xcount].flags = VM_KERN_SITE_COUNTER;
7302
7303 vm_map_size_t map_size, map_free, map_largest;
7304
7305 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
7306 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
7307
7308 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
7309 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
7310
7311 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
7312 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
7313
7314 map = kernel_map;
7315 stackIdx = 0;
7316 while (map)
7317 {
7318 vm_map_lock(map);
7319 for (entry = map->hdr.links.next; map; entry = entry->links.next)
7320 {
7321 if (entry->is_sub_map)
7322 {
7323 assert(stackIdx < kMaxKernelDepth);
7324 maps[stackIdx] = map;
7325 entries[stackIdx] = entry;
7326 stackIdx++;
7327 map = VME_SUBMAP(entry);
7328 entry = NULL;
7329 break;
7330 }
7331 if (VME_OBJECT(entry) == kernel_object)
7332 {
7333 count = 0;
7334 vm_object_lock(VME_OBJECT(entry));
7335 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
7336 {
7337 page = vm_page_lookup(VME_OBJECT(entry), offset);
7338 if (page && VM_PAGE_WIRED(page)) count++;
7339 }
7340 vm_object_unlock(VME_OBJECT(entry));
7341
7342 if (count)
7343 {
7344 assert(VME_ALIAS(entry) < num_sites);
7345 sites[VME_ALIAS(entry)].size += ptoa_64(count);
7346 }
7347 }
7348 if (entry == vm_map_last_entry(map))
7349 {
7350 vm_map_unlock(map);
7351 if (!stackIdx) map = NULL;
7352 else
7353 {
7354 --stackIdx;
7355 map = maps[stackIdx];
7356 entry = entries[stackIdx];
7357 }
7358 }
7359 }
7360 }
7361
7362 process_account(sites, num_sites);
7363
7364 return (KERN_SUCCESS);
7365 }