]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
0f1c6c9905103f683be5dec8115248b0df99fda0
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67 #include <libkern/OSDebug.h>
68
69 #include <mach/clock_types.h>
70 #include <mach/vm_prot.h>
71 #include <mach/vm_statistics.h>
72 #include <mach/sdt.h>
73 #include <kern/counters.h>
74 #include <kern/sched_prim.h>
75 #include <kern/task.h>
76 #include <kern/thread.h>
77 #include <kern/kalloc.h>
78 #include <kern/zalloc.h>
79 #include <kern/xpr.h>
80 #include <kern/ledger.h>
81 #include <vm/pmap.h>
82 #include <vm/vm_init.h>
83 #include <vm/vm_map.h>
84 #include <vm/vm_page.h>
85 #include <vm/vm_pageout.h>
86 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
87 #include <kern/misc_protos.h>
88 #include <zone_debug.h>
89 #include <mach_debug/zone_info.h>
90 #include <vm/cpm.h>
91 #include <pexpert/pexpert.h>
92
93 #include <vm/vm_protos.h>
94 #include <vm/memory_object.h>
95 #include <vm/vm_purgeable_internal.h>
96 #include <vm/vm_compressor.h>
97
98 #if CONFIG_PHANTOM_CACHE
99 #include <vm/vm_phantom_cache.h>
100 #endif
101
102 #include <IOKit/IOHibernatePrivate.h>
103
104 #include <sys/kdebug.h>
105
106 boolean_t hibernate_cleaning_in_progress = FALSE;
107 boolean_t vm_page_free_verify = TRUE;
108
109 uint32_t vm_lopage_free_count = 0;
110 uint32_t vm_lopage_free_limit = 0;
111 uint32_t vm_lopage_lowater = 0;
112 boolean_t vm_lopage_refill = FALSE;
113 boolean_t vm_lopage_needed = FALSE;
114
115 lck_mtx_ext_t vm_page_queue_lock_ext;
116 lck_mtx_ext_t vm_page_queue_free_lock_ext;
117 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
118
119 int speculative_age_index = 0;
120 int speculative_steal_index = 0;
121 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
122
123
124 __private_extern__ void vm_page_init_lck_grp(void);
125
126 static void vm_page_free_prepare(vm_page_t page);
127 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
128
129 static void vm_tag_init(void);
130
131 uint64_t vm_min_kernel_and_kext_address = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
132
133 /*
134 * Associated with page of user-allocatable memory is a
135 * page structure.
136 */
137
138 /*
139 * These variables record the values returned by vm_page_bootstrap,
140 * for debugging purposes. The implementation of pmap_steal_memory
141 * and pmap_startup here also uses them internally.
142 */
143
144 vm_offset_t virtual_space_start;
145 vm_offset_t virtual_space_end;
146 uint32_t vm_page_pages;
147
148 /*
149 * The vm_page_lookup() routine, which provides for fast
150 * (virtual memory object, offset) to page lookup, employs
151 * the following hash table. The vm_page_{insert,remove}
152 * routines install and remove associations in the table.
153 * [This table is often called the virtual-to-physical,
154 * or VP, table.]
155 */
156 typedef struct {
157 vm_page_packed_t page_list;
158 #if MACH_PAGE_HASH_STATS
159 int cur_count; /* current count */
160 int hi_count; /* high water mark */
161 #endif /* MACH_PAGE_HASH_STATS */
162 } vm_page_bucket_t;
163
164
165 #define BUCKETS_PER_LOCK 16
166
167 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
168 unsigned int vm_page_bucket_count = 0; /* How big is array? */
169 unsigned int vm_page_hash_mask; /* Mask for hash function */
170 unsigned int vm_page_hash_shift; /* Shift for hash function */
171 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
172 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
173
174 lck_spin_t *vm_page_bucket_locks;
175 lck_spin_t vm_objects_wired_lock;
176 lck_spin_t vm_allocation_sites_lock;
177
178 #if VM_PAGE_BUCKETS_CHECK
179 boolean_t vm_page_buckets_check_ready = FALSE;
180 #if VM_PAGE_FAKE_BUCKETS
181 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
182 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
183 #endif /* VM_PAGE_FAKE_BUCKETS */
184 #endif /* VM_PAGE_BUCKETS_CHECK */
185
186 extern int not_in_kdp;
187
188
189 #if MACH_PAGE_HASH_STATS
190 /* This routine is only for debug. It is intended to be called by
191 * hand by a developer using a kernel debugger. This routine prints
192 * out vm_page_hash table statistics to the kernel debug console.
193 */
194 void
195 hash_debug(void)
196 {
197 int i;
198 int numbuckets = 0;
199 int highsum = 0;
200 int maxdepth = 0;
201
202 for (i = 0; i < vm_page_bucket_count; i++) {
203 if (vm_page_buckets[i].hi_count) {
204 numbuckets++;
205 highsum += vm_page_buckets[i].hi_count;
206 if (vm_page_buckets[i].hi_count > maxdepth)
207 maxdepth = vm_page_buckets[i].hi_count;
208 }
209 }
210 printf("Total number of buckets: %d\n", vm_page_bucket_count);
211 printf("Number used buckets: %d = %d%%\n",
212 numbuckets, 100*numbuckets/vm_page_bucket_count);
213 printf("Number unused buckets: %d = %d%%\n",
214 vm_page_bucket_count - numbuckets,
215 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
216 printf("Sum of bucket max depth: %d\n", highsum);
217 printf("Average bucket depth: %d.%2d\n",
218 highsum/vm_page_bucket_count,
219 highsum%vm_page_bucket_count);
220 printf("Maximum bucket depth: %d\n", maxdepth);
221 }
222 #endif /* MACH_PAGE_HASH_STATS */
223
224 /*
225 * The virtual page size is currently implemented as a runtime
226 * variable, but is constant once initialized using vm_set_page_size.
227 * This initialization must be done in the machine-dependent
228 * bootstrap sequence, before calling other machine-independent
229 * initializations.
230 *
231 * All references to the virtual page size outside this
232 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
233 * constants.
234 */
235 vm_size_t page_size = PAGE_SIZE;
236 vm_size_t page_mask = PAGE_MASK;
237 int page_shift = PAGE_SHIFT;
238
239 /*
240 * Resident page structures are initialized from
241 * a template (see vm_page_alloc).
242 *
243 * When adding a new field to the virtual memory
244 * object structure, be sure to add initialization
245 * (see vm_page_bootstrap).
246 */
247 struct vm_page vm_page_template;
248
249 vm_page_t vm_pages = VM_PAGE_NULL;
250 unsigned int vm_pages_count = 0;
251 ppnum_t vm_page_lowest = 0;
252
253 /*
254 * Resident pages that represent real memory
255 * are allocated from a set of free lists,
256 * one per color.
257 */
258 unsigned int vm_colors;
259 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
260 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
261 unsigned int vm_free_magazine_refill_limit = 0;
262 queue_head_t vm_page_queue_free[MAX_COLORS];
263 unsigned int vm_page_free_wanted;
264 unsigned int vm_page_free_wanted_privileged;
265 unsigned int vm_page_free_count;
266 unsigned int vm_page_fictitious_count;
267
268 /*
269 * Occasionally, the virtual memory system uses
270 * resident page structures that do not refer to
271 * real pages, for example to leave a page with
272 * important state information in the VP table.
273 *
274 * These page structures are allocated the way
275 * most other kernel structures are.
276 */
277 zone_t vm_page_zone;
278 vm_locks_array_t vm_page_locks;
279 decl_lck_mtx_data(,vm_page_alloc_lock)
280 lck_mtx_ext_t vm_page_alloc_lock_ext;
281
282 unsigned int io_throttle_zero_fill;
283
284 unsigned int vm_page_local_q_count = 0;
285 unsigned int vm_page_local_q_soft_limit = 250;
286 unsigned int vm_page_local_q_hard_limit = 500;
287 struct vplq *vm_page_local_q = NULL;
288
289 /* N.B. Guard and fictitious pages must not
290 * be assigned a zero phys_page value.
291 */
292 /*
293 * Fictitious pages don't have a physical address,
294 * but we must initialize phys_page to something.
295 * For debugging, this should be a strange value
296 * that the pmap module can recognize in assertions.
297 */
298 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
299
300 /*
301 * Guard pages are not accessible so they don't
302 * need a physical address, but we need to enter
303 * one in the pmap.
304 * Let's make it recognizable and make sure that
305 * we don't use a real physical page with that
306 * physical address.
307 */
308 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
309
310 /*
311 * Resident page structures are also chained on
312 * queues that are used by the page replacement
313 * system (pageout daemon). These queues are
314 * defined here, but are shared by the pageout
315 * module. The inactive queue is broken into
316 * file backed and anonymous for convenience as the
317 * pageout daemon often assignes a higher
318 * importance to anonymous pages (less likely to pick)
319 */
320 queue_head_t vm_page_queue_active;
321 queue_head_t vm_page_queue_inactive;
322 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
323 queue_head_t vm_page_queue_throttled;
324
325 queue_head_t vm_objects_wired;
326
327 unsigned int vm_page_active_count;
328 unsigned int vm_page_inactive_count;
329 unsigned int vm_page_anonymous_count;
330 unsigned int vm_page_throttled_count;
331 unsigned int vm_page_speculative_count;
332
333 unsigned int vm_page_wire_count;
334 unsigned int vm_page_stolen_count;
335 unsigned int vm_page_wire_count_initial;
336 unsigned int vm_page_pages_initial;
337 unsigned int vm_page_gobble_count = 0;
338
339 #define VM_PAGE_WIRE_COUNT_WARNING 0
340 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
341
342 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
343 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
344 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
345
346 unsigned int vm_page_xpmapped_external_count = 0;
347 unsigned int vm_page_external_count = 0;
348 unsigned int vm_page_internal_count = 0;
349 unsigned int vm_page_pageable_external_count = 0;
350 unsigned int vm_page_pageable_internal_count = 0;
351
352 #if DEVELOPMENT || DEBUG
353 unsigned int vm_page_speculative_recreated = 0;
354 unsigned int vm_page_speculative_created = 0;
355 unsigned int vm_page_speculative_used = 0;
356 #endif
357
358 queue_head_t vm_page_queue_cleaned;
359
360 unsigned int vm_page_cleaned_count = 0;
361 unsigned int vm_pageout_enqueued_cleaned = 0;
362
363 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
364 ppnum_t max_valid_low_ppnum = 0xffffffff;
365
366
367 /*
368 * Several page replacement parameters are also
369 * shared with this module, so that page allocation
370 * (done here in vm_page_alloc) can trigger the
371 * pageout daemon.
372 */
373 unsigned int vm_page_free_target = 0;
374 unsigned int vm_page_free_min = 0;
375 unsigned int vm_page_throttle_limit = 0;
376 unsigned int vm_page_inactive_target = 0;
377 unsigned int vm_page_anonymous_min = 0;
378 unsigned int vm_page_inactive_min = 0;
379 unsigned int vm_page_free_reserved = 0;
380 unsigned int vm_page_throttle_count = 0;
381
382
383 /*
384 * The VM system has a couple of heuristics for deciding
385 * that pages are "uninteresting" and should be placed
386 * on the inactive queue as likely candidates for replacement.
387 * These variables let the heuristics be controlled at run-time
388 * to make experimentation easier.
389 */
390
391 boolean_t vm_page_deactivate_hint = TRUE;
392
393 struct vm_page_stats_reusable vm_page_stats_reusable;
394
395 /*
396 * vm_set_page_size:
397 *
398 * Sets the page size, perhaps based upon the memory
399 * size. Must be called before any use of page-size
400 * dependent functions.
401 *
402 * Sets page_shift and page_mask from page_size.
403 */
404 void
405 vm_set_page_size(void)
406 {
407 page_size = PAGE_SIZE;
408 page_mask = PAGE_MASK;
409 page_shift = PAGE_SHIFT;
410
411 if ((page_mask & page_size) != 0)
412 panic("vm_set_page_size: page size not a power of two");
413
414 for (page_shift = 0; ; page_shift++)
415 if ((1U << page_shift) == page_size)
416 break;
417 }
418
419 #define COLOR_GROUPS_TO_STEAL 4
420
421
422 /* Called once during statup, once the cache geometry is known.
423 */
424 static void
425 vm_page_set_colors( void )
426 {
427 unsigned int n, override;
428
429 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
430 n = override;
431 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
432 n = vm_cache_geometry_colors;
433 else n = DEFAULT_COLORS; /* use default if all else fails */
434
435 if ( n == 0 )
436 n = 1;
437 if ( n > MAX_COLORS )
438 n = MAX_COLORS;
439
440 /* the count must be a power of 2 */
441 if ( ( n & (n - 1)) != 0 )
442 panic("vm_page_set_colors");
443
444 vm_colors = n;
445 vm_color_mask = n - 1;
446
447 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
448 }
449
450
451 lck_grp_t vm_page_lck_grp_free;
452 lck_grp_t vm_page_lck_grp_queue;
453 lck_grp_t vm_page_lck_grp_local;
454 lck_grp_t vm_page_lck_grp_purge;
455 lck_grp_t vm_page_lck_grp_alloc;
456 lck_grp_t vm_page_lck_grp_bucket;
457 lck_grp_attr_t vm_page_lck_grp_attr;
458 lck_attr_t vm_page_lck_attr;
459
460
461 __private_extern__ void
462 vm_page_init_lck_grp(void)
463 {
464 /*
465 * initialze the vm_page lock world
466 */
467 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
468 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
469 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
470 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
471 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
472 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
473 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
474 lck_attr_setdefault(&vm_page_lck_attr);
475 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
476
477 vm_compressor_init_locks();
478 }
479
480 void
481 vm_page_init_local_q()
482 {
483 unsigned int num_cpus;
484 unsigned int i;
485 struct vplq *t_local_q;
486
487 num_cpus = ml_get_max_cpus();
488
489 /*
490 * no point in this for a uni-processor system
491 */
492 if (num_cpus >= 2) {
493 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
494
495 for (i = 0; i < num_cpus; i++) {
496 struct vpl *lq;
497
498 lq = &t_local_q[i].vpl_un.vpl;
499 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
500 queue_init(&lq->vpl_queue);
501 lq->vpl_count = 0;
502 lq->vpl_internal_count = 0;
503 lq->vpl_external_count = 0;
504 }
505 vm_page_local_q_count = num_cpus;
506
507 vm_page_local_q = (struct vplq *)t_local_q;
508 }
509 }
510
511
512 /*
513 * vm_page_bootstrap:
514 *
515 * Initializes the resident memory module.
516 *
517 * Allocates memory for the page cells, and
518 * for the object/offset-to-page hash table headers.
519 * Each page cell is initialized and placed on the free list.
520 * Returns the range of available kernel virtual memory.
521 */
522
523 void
524 vm_page_bootstrap(
525 vm_offset_t *startp,
526 vm_offset_t *endp)
527 {
528 register vm_page_t m;
529 unsigned int i;
530 unsigned int log1;
531 unsigned int log2;
532 unsigned int size;
533
534 /*
535 * Initialize the vm_page template.
536 */
537
538 m = &vm_page_template;
539 bzero(m, sizeof (*m));
540
541 m->pageq.next = NULL;
542 m->pageq.prev = NULL;
543 m->listq.next = NULL;
544 m->listq.prev = NULL;
545 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
546
547 m->object = VM_OBJECT_NULL; /* reset later */
548 m->offset = (vm_object_offset_t) -1; /* reset later */
549
550 m->wire_count = 0;
551 m->local = FALSE;
552 m->inactive = FALSE;
553 m->active = FALSE;
554 m->pageout_queue = FALSE;
555 m->speculative = FALSE;
556 m->laundry = FALSE;
557 m->free = FALSE;
558 m->reference = FALSE;
559 m->gobbled = FALSE;
560 m->private = FALSE;
561 m->throttled = FALSE;
562 m->__unused_pageq_bits = 0;
563
564 m->phys_page = 0; /* reset later */
565
566 m->busy = TRUE;
567 m->wanted = FALSE;
568 m->tabled = FALSE;
569 m->hashed = FALSE;
570 m->fictitious = FALSE;
571 m->pmapped = FALSE;
572 m->wpmapped = FALSE;
573 m->pageout = FALSE;
574 m->absent = FALSE;
575 m->error = FALSE;
576 m->dirty = FALSE;
577 m->cleaning = FALSE;
578 m->precious = FALSE;
579 m->clustered = FALSE;
580 m->overwriting = FALSE;
581 m->restart = FALSE;
582 m->unusual = FALSE;
583 m->encrypted = FALSE;
584 m->encrypted_cleaning = FALSE;
585 m->cs_validated = FALSE;
586 m->cs_tainted = FALSE;
587 m->cs_nx = FALSE;
588 m->no_cache = FALSE;
589 m->reusable = FALSE;
590 m->slid = FALSE;
591 m->xpmapped = FALSE;
592 m->compressor = FALSE;
593 m->written_by_kernel = FALSE;
594 m->__unused_object_bits = 0;
595
596 /*
597 * Initialize the page queues.
598 */
599 vm_page_init_lck_grp();
600
601 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
602 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
603 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
604
605 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
606 int group;
607
608 purgeable_queues[i].token_q_head = 0;
609 purgeable_queues[i].token_q_tail = 0;
610 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
611 queue_init(&purgeable_queues[i].objq[group]);
612
613 purgeable_queues[i].type = i;
614 purgeable_queues[i].new_pages = 0;
615 #if MACH_ASSERT
616 purgeable_queues[i].debug_count_tokens = 0;
617 purgeable_queues[i].debug_count_objects = 0;
618 #endif
619 };
620 purgeable_nonvolatile_count = 0;
621 queue_init(&purgeable_nonvolatile_queue);
622
623 for (i = 0; i < MAX_COLORS; i++ )
624 queue_init(&vm_page_queue_free[i]);
625
626 queue_init(&vm_lopage_queue_free);
627 queue_init(&vm_page_queue_active);
628 queue_init(&vm_page_queue_inactive);
629 queue_init(&vm_page_queue_cleaned);
630 queue_init(&vm_page_queue_throttled);
631 queue_init(&vm_page_queue_anonymous);
632 queue_init(&vm_objects_wired);
633
634 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
635 queue_init(&vm_page_queue_speculative[i].age_q);
636
637 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
638 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
639 }
640 vm_page_free_wanted = 0;
641 vm_page_free_wanted_privileged = 0;
642
643 vm_page_set_colors();
644
645
646 /*
647 * Steal memory for the map and zone subsystems.
648 */
649 kernel_debug_string_simple("zone_steal_memory");
650 zone_steal_memory();
651 kernel_debug_string_simple("vm_map_steal_memory");
652 vm_map_steal_memory();
653
654 /*
655 * Allocate (and initialize) the virtual-to-physical
656 * table hash buckets.
657 *
658 * The number of buckets should be a power of two to
659 * get a good hash function. The following computation
660 * chooses the first power of two that is greater
661 * than the number of physical pages in the system.
662 */
663
664 if (vm_page_bucket_count == 0) {
665 unsigned int npages = pmap_free_pages();
666
667 vm_page_bucket_count = 1;
668 while (vm_page_bucket_count < npages)
669 vm_page_bucket_count <<= 1;
670 }
671 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
672
673 vm_page_hash_mask = vm_page_bucket_count - 1;
674
675 /*
676 * Calculate object shift value for hashing algorithm:
677 * O = log2(sizeof(struct vm_object))
678 * B = log2(vm_page_bucket_count)
679 * hash shifts the object left by
680 * B/2 - O
681 */
682 size = vm_page_bucket_count;
683 for (log1 = 0; size > 1; log1++)
684 size /= 2;
685 size = sizeof(struct vm_object);
686 for (log2 = 0; size > 1; log2++)
687 size /= 2;
688 vm_page_hash_shift = log1/2 - log2 + 1;
689
690 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
691 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
692 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
693
694 if (vm_page_hash_mask & vm_page_bucket_count)
695 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
696
697 #if VM_PAGE_BUCKETS_CHECK
698 #if VM_PAGE_FAKE_BUCKETS
699 /*
700 * Allocate a decoy set of page buckets, to detect
701 * any stomping there.
702 */
703 vm_page_fake_buckets = (vm_page_bucket_t *)
704 pmap_steal_memory(vm_page_bucket_count *
705 sizeof(vm_page_bucket_t));
706 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
707 vm_page_fake_buckets_end =
708 vm_map_round_page((vm_page_fake_buckets_start +
709 (vm_page_bucket_count *
710 sizeof (vm_page_bucket_t))),
711 PAGE_MASK);
712 char *cp;
713 for (cp = (char *)vm_page_fake_buckets_start;
714 cp < (char *)vm_page_fake_buckets_end;
715 cp++) {
716 *cp = 0x5a;
717 }
718 #endif /* VM_PAGE_FAKE_BUCKETS */
719 #endif /* VM_PAGE_BUCKETS_CHECK */
720
721 kernel_debug_string_simple("vm_page_buckets");
722 vm_page_buckets = (vm_page_bucket_t *)
723 pmap_steal_memory(vm_page_bucket_count *
724 sizeof(vm_page_bucket_t));
725
726 kernel_debug_string_simple("vm_page_bucket_locks");
727 vm_page_bucket_locks = (lck_spin_t *)
728 pmap_steal_memory(vm_page_bucket_lock_count *
729 sizeof(lck_spin_t));
730
731 for (i = 0; i < vm_page_bucket_count; i++) {
732 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
733
734 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
735 #if MACH_PAGE_HASH_STATS
736 bucket->cur_count = 0;
737 bucket->hi_count = 0;
738 #endif /* MACH_PAGE_HASH_STATS */
739 }
740
741 for (i = 0; i < vm_page_bucket_lock_count; i++)
742 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
743
744 lck_spin_init(&vm_objects_wired_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
745 lck_spin_init(&vm_allocation_sites_lock, &vm_page_lck_grp_bucket, &vm_page_lck_attr);
746 vm_tag_init();
747
748 #if VM_PAGE_BUCKETS_CHECK
749 vm_page_buckets_check_ready = TRUE;
750 #endif /* VM_PAGE_BUCKETS_CHECK */
751
752 /*
753 * Machine-dependent code allocates the resident page table.
754 * It uses vm_page_init to initialize the page frames.
755 * The code also returns to us the virtual space available
756 * to the kernel. We don't trust the pmap module
757 * to get the alignment right.
758 */
759
760 kernel_debug_string_simple("pmap_startup");
761 pmap_startup(&virtual_space_start, &virtual_space_end);
762 virtual_space_start = round_page(virtual_space_start);
763 virtual_space_end = trunc_page(virtual_space_end);
764
765 *startp = virtual_space_start;
766 *endp = virtual_space_end;
767
768 /*
769 * Compute the initial "wire" count.
770 * Up until now, the pages which have been set aside are not under
771 * the VM system's control, so although they aren't explicitly
772 * wired, they nonetheless can't be moved. At this moment,
773 * all VM managed pages are "free", courtesy of pmap_startup.
774 */
775 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
776 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
777 vm_page_wire_count_initial = vm_page_wire_count;
778 vm_page_pages_initial = vm_page_pages;
779
780 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
781 vm_page_free_count, vm_page_wire_count);
782
783 kernel_debug_string_simple("vm_page_bootstrap complete");
784 simple_lock_init(&vm_paging_lock, 0);
785 }
786
787 #ifndef MACHINE_PAGES
788 /*
789 * We implement pmap_steal_memory and pmap_startup with the help
790 * of two simpler functions, pmap_virtual_space and pmap_next_page.
791 */
792
793 void *
794 pmap_steal_memory(
795 vm_size_t size)
796 {
797 vm_offset_t addr, vaddr;
798 ppnum_t phys_page;
799
800 /*
801 * We round the size to a round multiple.
802 */
803
804 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
805
806 /*
807 * If this is the first call to pmap_steal_memory,
808 * we have to initialize ourself.
809 */
810
811 if (virtual_space_start == virtual_space_end) {
812 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
813
814 /*
815 * The initial values must be aligned properly, and
816 * we don't trust the pmap module to do it right.
817 */
818
819 virtual_space_start = round_page(virtual_space_start);
820 virtual_space_end = trunc_page(virtual_space_end);
821 }
822
823 /*
824 * Allocate virtual memory for this request.
825 */
826
827 addr = virtual_space_start;
828 virtual_space_start += size;
829
830 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
831
832 /*
833 * Allocate and map physical pages to back new virtual pages.
834 */
835
836 for (vaddr = round_page(addr);
837 vaddr < addr + size;
838 vaddr += PAGE_SIZE) {
839
840 if (!pmap_next_page_hi(&phys_page))
841 panic("pmap_steal_memory");
842
843 /*
844 * XXX Logically, these mappings should be wired,
845 * but some pmap modules barf if they are.
846 */
847 #if defined(__LP64__)
848 pmap_pre_expand(kernel_pmap, vaddr);
849 #endif
850
851 pmap_enter(kernel_pmap, vaddr, phys_page,
852 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
853 VM_WIMG_USE_DEFAULT, FALSE);
854 /*
855 * Account for newly stolen memory
856 */
857 vm_page_wire_count++;
858 vm_page_stolen_count++;
859 }
860
861 return (void *) addr;
862 }
863
864 void vm_page_release_startup(vm_page_t mem);
865 void
866 pmap_startup(
867 vm_offset_t *startp,
868 vm_offset_t *endp)
869 {
870 unsigned int i, npages, pages_initialized, fill, fillval;
871 ppnum_t phys_page;
872 addr64_t tmpaddr;
873
874
875 #if defined(__LP64__)
876 /*
877 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
878 */
879 assert(sizeof(struct vm_page) == 64);
880
881 /*
882 * make sure we are aligned on a 64 byte boundary
883 * for VM_PAGE_PACK_PTR (it clips off the low-order
884 * 6 bits of the pointer)
885 */
886 if (virtual_space_start != virtual_space_end)
887 virtual_space_start = round_page(virtual_space_start);
888 #endif
889
890 /*
891 * We calculate how many page frames we will have
892 * and then allocate the page structures in one chunk.
893 */
894
895 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
896 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
897 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
898
899 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
900
901 /*
902 * Initialize the page frames.
903 */
904 kernel_debug_string_simple("Initialize the page frames");
905 for (i = 0, pages_initialized = 0; i < npages; i++) {
906 if (!pmap_next_page(&phys_page))
907 break;
908 if (pages_initialized == 0 || phys_page < vm_page_lowest)
909 vm_page_lowest = phys_page;
910
911 vm_page_init(&vm_pages[i], phys_page, FALSE);
912 vm_page_pages++;
913 pages_initialized++;
914 }
915 vm_pages_count = pages_initialized;
916
917 #if defined(__LP64__)
918
919 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
920 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
921
922 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
923 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
924 #endif
925 kernel_debug_string_simple("page fill/release");
926 /*
927 * Check if we want to initialize pages to a known value
928 */
929 fill = 0; /* Assume no fill */
930 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
931 #if DEBUG
932 /* This slows down booting the DEBUG kernel, particularly on
933 * large memory systems, but is worthwhile in deterministically
934 * trapping uninitialized memory usage.
935 */
936 if (fill == 0) {
937 fill = 1;
938 fillval = 0xDEB8F177;
939 }
940 #endif
941 if (fill)
942 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
943 // -debug code remove
944 if (2 == vm_himemory_mode) {
945 // free low -> high so high is preferred
946 for (i = 1; i <= pages_initialized; i++) {
947 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
948 vm_page_release_startup(&vm_pages[i - 1]);
949 }
950 }
951 else
952 // debug code remove-
953
954 /*
955 * Release pages in reverse order so that physical pages
956 * initially get allocated in ascending addresses. This keeps
957 * the devices (which must address physical memory) happy if
958 * they require several consecutive pages.
959 */
960 for (i = pages_initialized; i > 0; i--) {
961 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
962 vm_page_release_startup(&vm_pages[i - 1]);
963 }
964
965 VM_CHECK_MEMORYSTATUS;
966
967 #if 0
968 {
969 vm_page_t xx, xxo, xxl;
970 int i, j, k, l;
971
972 j = 0; /* (BRINGUP) */
973 xxl = 0;
974
975 for( i = 0; i < vm_colors; i++ ) {
976 queue_iterate(&vm_page_queue_free[i],
977 xx,
978 vm_page_t,
979 pageq) { /* BRINGUP */
980 j++; /* (BRINGUP) */
981 if(j > vm_page_free_count) { /* (BRINGUP) */
982 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
983 }
984
985 l = vm_page_free_count - j; /* (BRINGUP) */
986 k = 0; /* (BRINGUP) */
987
988 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
989
990 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
991 k++;
992 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
993 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
994 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
995 }
996 }
997
998 xxl = xx;
999 }
1000 }
1001
1002 if(j != vm_page_free_count) { /* (BRINGUP) */
1003 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
1004 }
1005 }
1006 #endif
1007
1008
1009 /*
1010 * We have to re-align virtual_space_start,
1011 * because pmap_steal_memory has been using it.
1012 */
1013
1014 virtual_space_start = round_page(virtual_space_start);
1015
1016 *startp = virtual_space_start;
1017 *endp = virtual_space_end;
1018 }
1019 #endif /* MACHINE_PAGES */
1020
1021 /*
1022 * Routine: vm_page_module_init
1023 * Purpose:
1024 * Second initialization pass, to be done after
1025 * the basic VM system is ready.
1026 */
1027 void
1028 vm_page_module_init(void)
1029 {
1030 uint64_t vm_page_zone_pages, vm_page_zone_data_size;
1031 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1032 0, PAGE_SIZE, "vm pages");
1033
1034 #if ZONE_DEBUG
1035 zone_debug_disable(vm_page_zone);
1036 #endif /* ZONE_DEBUG */
1037
1038 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1039 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1040 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1041 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1042 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1043 /*
1044 * Adjust zone statistics to account for the real pages allocated
1045 * in vm_page_create(). [Q: is this really what we want?]
1046 */
1047 vm_page_zone->count += vm_page_pages;
1048 vm_page_zone->sum_count += vm_page_pages;
1049 vm_page_zone_data_size = vm_page_pages * vm_page_zone->elem_size;
1050 vm_page_zone->cur_size += vm_page_zone_data_size;
1051 vm_page_zone_pages = ((round_page(vm_page_zone_data_size)) / PAGE_SIZE);
1052 OSAddAtomic64(vm_page_zone_pages, &(vm_page_zone->page_count));
1053 /* since zone accounts for these, take them out of stolen */
1054 VM_PAGE_MOVE_STOLEN(vm_page_zone_pages);
1055 }
1056
1057 /*
1058 * Routine: vm_page_create
1059 * Purpose:
1060 * After the VM system is up, machine-dependent code
1061 * may stumble across more physical memory. For example,
1062 * memory that it was reserving for a frame buffer.
1063 * vm_page_create turns this memory into available pages.
1064 */
1065
1066 void
1067 vm_page_create(
1068 ppnum_t start,
1069 ppnum_t end)
1070 {
1071 ppnum_t phys_page;
1072 vm_page_t m;
1073
1074 for (phys_page = start;
1075 phys_page < end;
1076 phys_page++) {
1077 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1078 == VM_PAGE_NULL)
1079 vm_page_more_fictitious();
1080
1081 m->fictitious = FALSE;
1082 pmap_clear_noencrypt(phys_page);
1083
1084 vm_page_pages++;
1085 vm_page_release(m);
1086 }
1087 }
1088
1089 /*
1090 * vm_page_hash:
1091 *
1092 * Distributes the object/offset key pair among hash buckets.
1093 *
1094 * NOTE: The bucket count must be a power of 2
1095 */
1096 #define vm_page_hash(object, offset) (\
1097 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1098 & vm_page_hash_mask)
1099
1100
1101 /*
1102 * vm_page_insert: [ internal use only ]
1103 *
1104 * Inserts the given mem entry into the object/object-page
1105 * table and object list.
1106 *
1107 * The object must be locked.
1108 */
1109 void
1110 vm_page_insert(
1111 vm_page_t mem,
1112 vm_object_t object,
1113 vm_object_offset_t offset)
1114 {
1115 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, TRUE, FALSE, FALSE, NULL);
1116 }
1117
1118 void
1119 vm_page_insert_wired(
1120 vm_page_t mem,
1121 vm_object_t object,
1122 vm_object_offset_t offset,
1123 vm_tag_t tag)
1124 {
1125 vm_page_insert_internal(mem, object, offset, tag, FALSE, TRUE, FALSE, FALSE, NULL);
1126 }
1127
1128 void
1129 vm_page_insert_internal(
1130 vm_page_t mem,
1131 vm_object_t object,
1132 vm_object_offset_t offset,
1133 vm_tag_t tag,
1134 boolean_t queues_lock_held,
1135 boolean_t insert_in_hash,
1136 boolean_t batch_pmap_op,
1137 boolean_t batch_accounting,
1138 uint64_t *delayed_ledger_update)
1139 {
1140 vm_page_bucket_t *bucket;
1141 lck_spin_t *bucket_lock;
1142 int hash_id;
1143 task_t owner;
1144
1145 XPR(XPR_VM_PAGE,
1146 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1147 object, offset, mem, 0,0);
1148 #if 0
1149 /*
1150 * we may not hold the page queue lock
1151 * so this check isn't safe to make
1152 */
1153 VM_PAGE_CHECK(mem);
1154 #endif
1155
1156 assert(page_aligned(offset));
1157
1158 assert(!VM_PAGE_WIRED(mem) || mem->private || mem->fictitious || (tag != VM_KERN_MEMORY_NONE));
1159
1160 /* the vm_submap_object is only a placeholder for submaps */
1161 assert(object != vm_submap_object);
1162
1163 vm_object_lock_assert_exclusive(object);
1164 #if DEBUG
1165 lck_mtx_assert(&vm_page_queue_lock,
1166 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1167 : LCK_MTX_ASSERT_NOTOWNED);
1168 #endif /* DEBUG */
1169
1170 if (insert_in_hash == TRUE) {
1171 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1172 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1173 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1174 "already in (obj=%p,off=0x%llx)",
1175 mem, object, offset, mem->object, mem->offset);
1176 #endif
1177 assert(!object->internal || offset < object->vo_size);
1178
1179 /* only insert "pageout" pages into "pageout" objects,
1180 * and normal pages into normal objects */
1181 #if 00
1182 /*
1183 * For some reason, this assertion gets tripped
1184 * but it's mostly harmless, so let's disable it
1185 * for now.
1186 */
1187 assert(object->pageout == mem->pageout);
1188 #endif /* 00 */
1189
1190 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1191
1192 /*
1193 * Record the object/offset pair in this page
1194 */
1195
1196 mem->object = object;
1197 mem->offset = offset;
1198
1199 /*
1200 * Insert it into the object_object/offset hash table
1201 */
1202 hash_id = vm_page_hash(object, offset);
1203 bucket = &vm_page_buckets[hash_id];
1204 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1205
1206 lck_spin_lock(bucket_lock);
1207
1208 mem->next_m = bucket->page_list;
1209 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1210 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1211
1212 #if MACH_PAGE_HASH_STATS
1213 if (++bucket->cur_count > bucket->hi_count)
1214 bucket->hi_count = bucket->cur_count;
1215 #endif /* MACH_PAGE_HASH_STATS */
1216 mem->hashed = TRUE;
1217 lck_spin_unlock(bucket_lock);
1218 }
1219
1220 {
1221 unsigned int cache_attr;
1222
1223 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1224
1225 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1226 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1227 }
1228 }
1229 /*
1230 * Now link into the object's list of backed pages.
1231 */
1232 queue_enter(&object->memq, mem, vm_page_t, listq);
1233 object->memq_hint = mem;
1234 mem->tabled = TRUE;
1235
1236 /*
1237 * Show that the object has one more resident page.
1238 */
1239
1240 object->resident_page_count++;
1241 if (VM_PAGE_WIRED(mem)) {
1242 if (!mem->private && !mem->fictitious)
1243 {
1244 if (!object->wired_page_count)
1245 {
1246 assert(VM_KERN_MEMORY_NONE != tag);
1247 object->wire_tag = tag;
1248 VM_OBJECT_WIRED(object);
1249 }
1250 }
1251 object->wired_page_count++;
1252 }
1253 assert(object->resident_page_count >= object->wired_page_count);
1254
1255 if (batch_accounting == FALSE) {
1256 if (object->internal) {
1257 OSAddAtomic(1, &vm_page_internal_count);
1258 } else {
1259 OSAddAtomic(1, &vm_page_external_count);
1260 }
1261 }
1262
1263 /*
1264 * It wouldn't make sense to insert a "reusable" page in
1265 * an object (the page would have been marked "reusable" only
1266 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1267 * in the object at that time).
1268 * But a page could be inserted in a "all_reusable" object, if
1269 * something faults it in (a vm_read() from another task or a
1270 * "use-after-free" issue in user space, for example). It can
1271 * also happen if we're relocating a page from that object to
1272 * a different physical page during a physically-contiguous
1273 * allocation.
1274 */
1275 assert(!mem->reusable);
1276 if (mem->object->all_reusable) {
1277 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1278 }
1279
1280 if (object->purgable == VM_PURGABLE_DENY) {
1281 owner = TASK_NULL;
1282 } else {
1283 owner = object->vo_purgeable_owner;
1284 }
1285 if (owner &&
1286 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1287 VM_PAGE_WIRED(mem))) {
1288
1289 if (delayed_ledger_update)
1290 *delayed_ledger_update += PAGE_SIZE;
1291 else {
1292 /* more non-volatile bytes */
1293 ledger_credit(owner->ledger,
1294 task_ledgers.purgeable_nonvolatile,
1295 PAGE_SIZE);
1296 /* more footprint */
1297 ledger_credit(owner->ledger,
1298 task_ledgers.phys_footprint,
1299 PAGE_SIZE);
1300 }
1301
1302 } else if (owner &&
1303 (object->purgable == VM_PURGABLE_VOLATILE ||
1304 object->purgable == VM_PURGABLE_EMPTY)) {
1305 assert(! VM_PAGE_WIRED(mem));
1306 /* more volatile bytes */
1307 ledger_credit(owner->ledger,
1308 task_ledgers.purgeable_volatile,
1309 PAGE_SIZE);
1310 }
1311
1312 if (object->purgable == VM_PURGABLE_VOLATILE) {
1313 if (VM_PAGE_WIRED(mem)) {
1314 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1315 } else {
1316 OSAddAtomic(+1, &vm_page_purgeable_count);
1317 }
1318 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1319 mem->throttled) {
1320 /*
1321 * This page belongs to a purged VM object but hasn't
1322 * been purged (because it was "busy").
1323 * It's in the "throttled" queue and hence not
1324 * visible to vm_pageout_scan(). Move it to a pageable
1325 * queue, so that it can eventually be reclaimed, instead
1326 * of lingering in the "empty" object.
1327 */
1328 if (queues_lock_held == FALSE)
1329 vm_page_lockspin_queues();
1330 vm_page_deactivate(mem);
1331 if (queues_lock_held == FALSE)
1332 vm_page_unlock_queues();
1333 }
1334
1335 #if VM_OBJECT_TRACKING_OP_MODIFIED
1336 if (vm_object_tracking_inited &&
1337 object->internal &&
1338 object->resident_page_count == 0 &&
1339 object->pager == NULL &&
1340 object->shadow != NULL &&
1341 object->shadow->copy == object) {
1342 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1343 int numsaved = 0;
1344
1345 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1346 btlog_add_entry(vm_object_tracking_btlog,
1347 object,
1348 VM_OBJECT_TRACKING_OP_MODIFIED,
1349 bt,
1350 numsaved);
1351 }
1352 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1353 }
1354
1355 /*
1356 * vm_page_replace:
1357 *
1358 * Exactly like vm_page_insert, except that we first
1359 * remove any existing page at the given offset in object.
1360 *
1361 * The object must be locked.
1362 */
1363 void
1364 vm_page_replace(
1365 register vm_page_t mem,
1366 register vm_object_t object,
1367 register vm_object_offset_t offset)
1368 {
1369 vm_page_bucket_t *bucket;
1370 vm_page_t found_m = VM_PAGE_NULL;
1371 lck_spin_t *bucket_lock;
1372 int hash_id;
1373
1374 #if 0
1375 /*
1376 * we don't hold the page queue lock
1377 * so this check isn't safe to make
1378 */
1379 VM_PAGE_CHECK(mem);
1380 #endif
1381 vm_object_lock_assert_exclusive(object);
1382 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1383 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1384 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1385 "already in (obj=%p,off=0x%llx)",
1386 mem, object, offset, mem->object, mem->offset);
1387 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1388 #endif
1389 /*
1390 * Record the object/offset pair in this page
1391 */
1392
1393 mem->object = object;
1394 mem->offset = offset;
1395
1396 /*
1397 * Insert it into the object_object/offset hash table,
1398 * replacing any page that might have been there.
1399 */
1400
1401 hash_id = vm_page_hash(object, offset);
1402 bucket = &vm_page_buckets[hash_id];
1403 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1404
1405 lck_spin_lock(bucket_lock);
1406
1407 if (bucket->page_list) {
1408 vm_page_packed_t *mp = &bucket->page_list;
1409 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1410
1411 do {
1412 if (m->object == object && m->offset == offset) {
1413 /*
1414 * Remove old page from hash list
1415 */
1416 *mp = m->next_m;
1417 m->hashed = FALSE;
1418
1419 found_m = m;
1420 break;
1421 }
1422 mp = &m->next_m;
1423 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1424
1425 mem->next_m = bucket->page_list;
1426 } else {
1427 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1428 }
1429 /*
1430 * insert new page at head of hash list
1431 */
1432 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1433 mem->hashed = TRUE;
1434
1435 lck_spin_unlock(bucket_lock);
1436
1437 if (found_m) {
1438 /*
1439 * there was already a page at the specified
1440 * offset for this object... remove it from
1441 * the object and free it back to the free list
1442 */
1443 vm_page_free_unlocked(found_m, FALSE);
1444 }
1445 vm_page_insert_internal(mem, object, offset, VM_KERN_MEMORY_NONE, FALSE, FALSE, FALSE, FALSE, NULL);
1446 }
1447
1448 /*
1449 * vm_page_remove: [ internal use only ]
1450 *
1451 * Removes the given mem entry from the object/offset-page
1452 * table and the object page list.
1453 *
1454 * The object must be locked.
1455 */
1456
1457 void
1458 vm_page_remove(
1459 vm_page_t mem,
1460 boolean_t remove_from_hash)
1461 {
1462 vm_page_bucket_t *bucket;
1463 vm_page_t this;
1464 lck_spin_t *bucket_lock;
1465 int hash_id;
1466 task_t owner;
1467
1468 XPR(XPR_VM_PAGE,
1469 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1470 mem->object, mem->offset,
1471 mem, 0,0);
1472
1473 vm_object_lock_assert_exclusive(mem->object);
1474 assert(mem->tabled);
1475 assert(!mem->cleaning);
1476 assert(!mem->laundry);
1477 #if 0
1478 /*
1479 * we don't hold the page queue lock
1480 * so this check isn't safe to make
1481 */
1482 VM_PAGE_CHECK(mem);
1483 #endif
1484 if (remove_from_hash == TRUE) {
1485 /*
1486 * Remove from the object_object/offset hash table
1487 */
1488 hash_id = vm_page_hash(mem->object, mem->offset);
1489 bucket = &vm_page_buckets[hash_id];
1490 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1491
1492 lck_spin_lock(bucket_lock);
1493
1494 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1495 /* optimize for common case */
1496
1497 bucket->page_list = mem->next_m;
1498 } else {
1499 vm_page_packed_t *prev;
1500
1501 for (prev = &this->next_m;
1502 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1503 prev = &this->next_m)
1504 continue;
1505 *prev = this->next_m;
1506 }
1507 #if MACH_PAGE_HASH_STATS
1508 bucket->cur_count--;
1509 #endif /* MACH_PAGE_HASH_STATS */
1510 mem->hashed = FALSE;
1511 lck_spin_unlock(bucket_lock);
1512 }
1513 /*
1514 * Now remove from the object's list of backed pages.
1515 */
1516
1517 vm_page_remove_internal(mem);
1518
1519 /*
1520 * And show that the object has one fewer resident
1521 * page.
1522 */
1523
1524 assert(mem->object->resident_page_count > 0);
1525 mem->object->resident_page_count--;
1526
1527 if (mem->object->internal) {
1528 #if DEBUG
1529 assert(vm_page_internal_count);
1530 #endif /* DEBUG */
1531
1532 OSAddAtomic(-1, &vm_page_internal_count);
1533 } else {
1534 assert(vm_page_external_count);
1535 OSAddAtomic(-1, &vm_page_external_count);
1536
1537 if (mem->xpmapped) {
1538 assert(vm_page_xpmapped_external_count);
1539 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1540 }
1541 }
1542 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1543 if (mem->object->resident_page_count == 0)
1544 vm_object_cache_remove(mem->object);
1545 }
1546
1547 if (VM_PAGE_WIRED(mem)) {
1548 assert(mem->object->wired_page_count > 0);
1549 mem->object->wired_page_count--;
1550 if (!mem->object->wired_page_count) {
1551 VM_OBJECT_UNWIRED(mem->object);
1552 }
1553 }
1554 assert(mem->object->resident_page_count >=
1555 mem->object->wired_page_count);
1556 if (mem->reusable) {
1557 assert(mem->object->reusable_page_count > 0);
1558 mem->object->reusable_page_count--;
1559 assert(mem->object->reusable_page_count <=
1560 mem->object->resident_page_count);
1561 mem->reusable = FALSE;
1562 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1563 vm_page_stats_reusable.reused_remove++;
1564 } else if (mem->object->all_reusable) {
1565 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1566 vm_page_stats_reusable.reused_remove++;
1567 }
1568
1569 if (mem->object->purgable == VM_PURGABLE_DENY) {
1570 owner = TASK_NULL;
1571 } else {
1572 owner = mem->object->vo_purgeable_owner;
1573 }
1574 if (owner &&
1575 (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1576 VM_PAGE_WIRED(mem))) {
1577 /* less non-volatile bytes */
1578 ledger_debit(owner->ledger,
1579 task_ledgers.purgeable_nonvolatile,
1580 PAGE_SIZE);
1581 /* less footprint */
1582 ledger_debit(owner->ledger,
1583 task_ledgers.phys_footprint,
1584 PAGE_SIZE);
1585 } else if (owner &&
1586 (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1587 mem->object->purgable == VM_PURGABLE_EMPTY)) {
1588 assert(! VM_PAGE_WIRED(mem));
1589 /* less volatile bytes */
1590 ledger_debit(owner->ledger,
1591 task_ledgers.purgeable_volatile,
1592 PAGE_SIZE);
1593 }
1594 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1595 if (VM_PAGE_WIRED(mem)) {
1596 assert(vm_page_purgeable_wired_count > 0);
1597 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1598 } else {
1599 assert(vm_page_purgeable_count > 0);
1600 OSAddAtomic(-1, &vm_page_purgeable_count);
1601 }
1602 }
1603 if (mem->object->set_cache_attr == TRUE)
1604 pmap_set_cache_attributes(mem->phys_page, 0);
1605
1606 mem->tabled = FALSE;
1607 mem->object = VM_OBJECT_NULL;
1608 mem->offset = (vm_object_offset_t) -1;
1609 }
1610
1611
1612 /*
1613 * vm_page_lookup:
1614 *
1615 * Returns the page associated with the object/offset
1616 * pair specified; if none is found, VM_PAGE_NULL is returned.
1617 *
1618 * The object must be locked. No side effects.
1619 */
1620
1621 #define VM_PAGE_HASH_LOOKUP_THRESHOLD 10
1622
1623 #if DEBUG_VM_PAGE_LOOKUP
1624
1625 struct {
1626 uint64_t vpl_total;
1627 uint64_t vpl_empty_obj;
1628 uint64_t vpl_bucket_NULL;
1629 uint64_t vpl_hit_hint;
1630 uint64_t vpl_hit_hint_next;
1631 uint64_t vpl_hit_hint_prev;
1632 uint64_t vpl_fast;
1633 uint64_t vpl_slow;
1634 uint64_t vpl_hit;
1635 uint64_t vpl_miss;
1636
1637 uint64_t vpl_fast_elapsed;
1638 uint64_t vpl_slow_elapsed;
1639 } vm_page_lookup_stats __attribute__((aligned(8)));
1640
1641 #endif
1642
1643 #define KDP_VM_PAGE_WALK_MAX 1000
1644
1645 vm_page_t
1646 kdp_vm_page_lookup(
1647 vm_object_t object,
1648 vm_object_offset_t offset)
1649 {
1650 vm_page_t cur_page;
1651 int num_traversed = 0;
1652
1653 if (not_in_kdp) {
1654 panic("panic: kdp_vm_page_lookup done outside of kernel debugger");
1655 }
1656
1657 queue_iterate(&object->memq, cur_page, vm_page_t, listq) {
1658 if (cur_page->offset == offset) {
1659 return cur_page;
1660 }
1661 num_traversed++;
1662
1663 if (num_traversed >= KDP_VM_PAGE_WALK_MAX) {
1664 return VM_PAGE_NULL;
1665 }
1666 }
1667
1668 return VM_PAGE_NULL;
1669 }
1670
1671 vm_page_t
1672 vm_page_lookup(
1673 vm_object_t object,
1674 vm_object_offset_t offset)
1675 {
1676 vm_page_t mem;
1677 vm_page_bucket_t *bucket;
1678 queue_entry_t qe;
1679 lck_spin_t *bucket_lock = NULL;
1680 int hash_id;
1681 #if DEBUG_VM_PAGE_LOOKUP
1682 uint64_t start, elapsed;
1683
1684 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_total);
1685 #endif
1686 vm_object_lock_assert_held(object);
1687
1688 if (object->resident_page_count == 0) {
1689 #if DEBUG_VM_PAGE_LOOKUP
1690 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_empty_obj);
1691 #endif
1692 return (VM_PAGE_NULL);
1693 }
1694
1695 mem = object->memq_hint;
1696
1697 if (mem != VM_PAGE_NULL) {
1698 assert(mem->object == object);
1699
1700 if (mem->offset == offset) {
1701 #if DEBUG_VM_PAGE_LOOKUP
1702 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint);
1703 #endif
1704 return (mem);
1705 }
1706 qe = queue_next(&mem->listq);
1707
1708 if (! queue_end(&object->memq, qe)) {
1709 vm_page_t next_page;
1710
1711 next_page = (vm_page_t) qe;
1712 assert(next_page->object == object);
1713
1714 if (next_page->offset == offset) {
1715 object->memq_hint = next_page; /* new hint */
1716 #if DEBUG_VM_PAGE_LOOKUP
1717 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_next);
1718 #endif
1719 return (next_page);
1720 }
1721 }
1722 qe = queue_prev(&mem->listq);
1723
1724 if (! queue_end(&object->memq, qe)) {
1725 vm_page_t prev_page;
1726
1727 prev_page = (vm_page_t) qe;
1728 assert(prev_page->object == object);
1729
1730 if (prev_page->offset == offset) {
1731 object->memq_hint = prev_page; /* new hint */
1732 #if DEBUG_VM_PAGE_LOOKUP
1733 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit_hint_prev);
1734 #endif
1735 return (prev_page);
1736 }
1737 }
1738 }
1739 /*
1740 * Search the hash table for this object/offset pair
1741 */
1742 hash_id = vm_page_hash(object, offset);
1743 bucket = &vm_page_buckets[hash_id];
1744
1745 /*
1746 * since we hold the object lock, we are guaranteed that no
1747 * new pages can be inserted into this object... this in turn
1748 * guarantess that the page we're looking for can't exist
1749 * if the bucket it hashes to is currently NULL even when looked
1750 * at outside the scope of the hash bucket lock... this is a
1751 * really cheap optimiztion to avoid taking the lock
1752 */
1753 if (!bucket->page_list) {
1754 #if DEBUG_VM_PAGE_LOOKUP
1755 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_bucket_NULL);
1756 #endif
1757 return (VM_PAGE_NULL);
1758 }
1759
1760 #if DEBUG_VM_PAGE_LOOKUP
1761 start = mach_absolute_time();
1762 #endif
1763 if (object->resident_page_count <= VM_PAGE_HASH_LOOKUP_THRESHOLD) {
1764 /*
1765 * on average, it's roughly 3 times faster to run a short memq list
1766 * than to take the spin lock and go through the hash list
1767 */
1768 mem = (vm_page_t)queue_first(&object->memq);
1769
1770 while (!queue_end(&object->memq, (queue_entry_t)mem)) {
1771
1772 if (mem->offset == offset)
1773 break;
1774
1775 mem = (vm_page_t)queue_next(&mem->listq);
1776 }
1777 if (queue_end(&object->memq, (queue_entry_t)mem))
1778 mem = NULL;
1779 } else {
1780
1781 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1782
1783 lck_spin_lock(bucket_lock);
1784
1785 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1786 #if 0
1787 /*
1788 * we don't hold the page queue lock
1789 * so this check isn't safe to make
1790 */
1791 VM_PAGE_CHECK(mem);
1792 #endif
1793 if ((mem->object == object) && (mem->offset == offset))
1794 break;
1795 }
1796 lck_spin_unlock(bucket_lock);
1797 }
1798
1799 #if DEBUG_VM_PAGE_LOOKUP
1800 elapsed = mach_absolute_time() - start;
1801
1802 if (bucket_lock) {
1803 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_slow);
1804 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_slow_elapsed);
1805 } else {
1806 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_fast);
1807 OSAddAtomic64(elapsed, &vm_page_lookup_stats.vpl_fast_elapsed);
1808 }
1809 if (mem != VM_PAGE_NULL)
1810 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_hit);
1811 else
1812 OSAddAtomic64(1, &vm_page_lookup_stats.vpl_miss);
1813 #endif
1814 if (mem != VM_PAGE_NULL) {
1815 assert(mem->object == object);
1816
1817 object->memq_hint = mem;
1818 }
1819 return (mem);
1820 }
1821
1822
1823 /*
1824 * vm_page_rename:
1825 *
1826 * Move the given memory entry from its
1827 * current object to the specified target object/offset.
1828 *
1829 * The object must be locked.
1830 */
1831 void
1832 vm_page_rename(
1833 register vm_page_t mem,
1834 register vm_object_t new_object,
1835 vm_object_offset_t new_offset,
1836 boolean_t encrypted_ok)
1837 {
1838 boolean_t internal_to_external, external_to_internal;
1839 vm_tag_t tag;
1840
1841 assert(mem->object != new_object);
1842
1843 assert(mem->object);
1844
1845 /*
1846 * ENCRYPTED SWAP:
1847 * The encryption key is based on the page's memory object
1848 * (aka "pager") and paging offset. Moving the page to
1849 * another VM object changes its "pager" and "paging_offset"
1850 * so it has to be decrypted first, or we would lose the key.
1851 *
1852 * One exception is VM object collapsing, where we transfer pages
1853 * from one backing object to its parent object. This operation also
1854 * transfers the paging information, so the <pager,paging_offset> info
1855 * should remain consistent. The caller (vm_object_do_collapse())
1856 * sets "encrypted_ok" in this case.
1857 */
1858 if (!encrypted_ok && mem->encrypted) {
1859 panic("vm_page_rename: page %p is encrypted\n", mem);
1860 }
1861
1862 XPR(XPR_VM_PAGE,
1863 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1864 new_object, new_offset,
1865 mem, 0,0);
1866
1867 /*
1868 * Changes to mem->object require the page lock because
1869 * the pageout daemon uses that lock to get the object.
1870 */
1871 vm_page_lockspin_queues();
1872
1873 internal_to_external = FALSE;
1874 external_to_internal = FALSE;
1875
1876 if (mem->local) {
1877 /*
1878 * it's much easier to get the vm_page_pageable_xxx accounting correct
1879 * if we first move the page to the active queue... it's going to end
1880 * up there anyway, and we don't do vm_page_rename's frequently enough
1881 * for this to matter.
1882 */
1883 vm_page_queues_remove(mem);
1884 vm_page_activate(mem);
1885 }
1886 if (mem->active || mem->inactive || mem->speculative) {
1887 if (mem->object->internal && !new_object->internal) {
1888 internal_to_external = TRUE;
1889 }
1890 if (!mem->object->internal && new_object->internal) {
1891 external_to_internal = TRUE;
1892 }
1893 }
1894
1895 tag = mem->object->wire_tag;
1896 vm_page_remove(mem, TRUE);
1897 vm_page_insert_internal(mem, new_object, new_offset, tag, TRUE, TRUE, FALSE, FALSE, NULL);
1898
1899 if (internal_to_external) {
1900 vm_page_pageable_internal_count--;
1901 vm_page_pageable_external_count++;
1902 } else if (external_to_internal) {
1903 vm_page_pageable_external_count--;
1904 vm_page_pageable_internal_count++;
1905 }
1906
1907 vm_page_unlock_queues();
1908 }
1909
1910 /*
1911 * vm_page_init:
1912 *
1913 * Initialize the fields in a new page.
1914 * This takes a structure with random values and initializes it
1915 * so that it can be given to vm_page_release or vm_page_insert.
1916 */
1917 void
1918 vm_page_init(
1919 vm_page_t mem,
1920 ppnum_t phys_page,
1921 boolean_t lopage)
1922 {
1923 assert(phys_page);
1924
1925 #if DEBUG
1926 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1927 if (!(pmap_valid_page(phys_page))) {
1928 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1929 }
1930 }
1931 #endif
1932 *mem = vm_page_template;
1933 mem->phys_page = phys_page;
1934 #if 0
1935 /*
1936 * we're leaving this turned off for now... currently pages
1937 * come off the free list and are either immediately dirtied/referenced
1938 * due to zero-fill or COW faults, or are used to read or write files...
1939 * in the file I/O case, the UPL mechanism takes care of clearing
1940 * the state of the HW ref/mod bits in a somewhat fragile way.
1941 * Since we may change the way this works in the future (to toughen it up),
1942 * I'm leaving this as a reminder of where these bits could get cleared
1943 */
1944
1945 /*
1946 * make sure both the h/w referenced and modified bits are
1947 * clear at this point... we are especially dependent on
1948 * not finding a 'stale' h/w modified in a number of spots
1949 * once this page goes back into use
1950 */
1951 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1952 #endif
1953 mem->lopage = lopage;
1954 }
1955
1956 /*
1957 * vm_page_grab_fictitious:
1958 *
1959 * Remove a fictitious page from the free list.
1960 * Returns VM_PAGE_NULL if there are no free pages.
1961 */
1962 int c_vm_page_grab_fictitious = 0;
1963 int c_vm_page_grab_fictitious_failed = 0;
1964 int c_vm_page_release_fictitious = 0;
1965 int c_vm_page_more_fictitious = 0;
1966
1967 vm_page_t
1968 vm_page_grab_fictitious_common(
1969 ppnum_t phys_addr)
1970 {
1971 vm_page_t m;
1972
1973 if ((m = (vm_page_t)zget(vm_page_zone))) {
1974
1975 vm_page_init(m, phys_addr, FALSE);
1976 m->fictitious = TRUE;
1977
1978 c_vm_page_grab_fictitious++;
1979 } else
1980 c_vm_page_grab_fictitious_failed++;
1981
1982 return m;
1983 }
1984
1985 vm_page_t
1986 vm_page_grab_fictitious(void)
1987 {
1988 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1989 }
1990
1991 vm_page_t
1992 vm_page_grab_guard(void)
1993 {
1994 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1995 }
1996
1997
1998 /*
1999 * vm_page_release_fictitious:
2000 *
2001 * Release a fictitious page to the zone pool
2002 */
2003 void
2004 vm_page_release_fictitious(
2005 vm_page_t m)
2006 {
2007 assert(!m->free);
2008 assert(m->fictitious);
2009 assert(m->phys_page == vm_page_fictitious_addr ||
2010 m->phys_page == vm_page_guard_addr);
2011
2012 c_vm_page_release_fictitious++;
2013
2014 zfree(vm_page_zone, m);
2015 }
2016
2017 /*
2018 * vm_page_more_fictitious:
2019 *
2020 * Add more fictitious pages to the zone.
2021 * Allowed to block. This routine is way intimate
2022 * with the zones code, for several reasons:
2023 * 1. we need to carve some page structures out of physical
2024 * memory before zones work, so they _cannot_ come from
2025 * the zone_map.
2026 * 2. the zone needs to be collectable in order to prevent
2027 * growth without bound. These structures are used by
2028 * the device pager (by the hundreds and thousands), as
2029 * private pages for pageout, and as blocking pages for
2030 * pagein. Temporary bursts in demand should not result in
2031 * permanent allocation of a resource.
2032 * 3. To smooth allocation humps, we allocate single pages
2033 * with kernel_memory_allocate(), and cram them into the
2034 * zone.
2035 */
2036
2037 void vm_page_more_fictitious(void)
2038 {
2039 vm_offset_t addr;
2040 kern_return_t retval;
2041
2042 c_vm_page_more_fictitious++;
2043
2044 /*
2045 * Allocate a single page from the zone_map. Do not wait if no physical
2046 * pages are immediately available, and do not zero the space. We need
2047 * our own blocking lock here to prevent having multiple,
2048 * simultaneous requests from piling up on the zone_map lock. Exactly
2049 * one (of our) threads should be potentially waiting on the map lock.
2050 * If winner is not vm-privileged, then the page allocation will fail,
2051 * and it will temporarily block here in the vm_page_wait().
2052 */
2053 lck_mtx_lock(&vm_page_alloc_lock);
2054 /*
2055 * If another thread allocated space, just bail out now.
2056 */
2057 if (zone_free_count(vm_page_zone) > 5) {
2058 /*
2059 * The number "5" is a small number that is larger than the
2060 * number of fictitious pages that any single caller will
2061 * attempt to allocate. Otherwise, a thread will attempt to
2062 * acquire a fictitious page (vm_page_grab_fictitious), fail,
2063 * release all of the resources and locks already acquired,
2064 * and then call this routine. This routine finds the pages
2065 * that the caller released, so fails to allocate new space.
2066 * The process repeats infinitely. The largest known number
2067 * of fictitious pages required in this manner is 2. 5 is
2068 * simply a somewhat larger number.
2069 */
2070 lck_mtx_unlock(&vm_page_alloc_lock);
2071 return;
2072 }
2073
2074 retval = kernel_memory_allocate(zone_map,
2075 &addr, PAGE_SIZE, VM_PROT_ALL,
2076 KMA_KOBJECT|KMA_NOPAGEWAIT, VM_KERN_MEMORY_ZONE);
2077 if (retval != KERN_SUCCESS) {
2078 /*
2079 * No page was available. Drop the
2080 * lock to give another thread a chance at it, and
2081 * wait for the pageout daemon to make progress.
2082 */
2083 lck_mtx_unlock(&vm_page_alloc_lock);
2084 vm_page_wait(THREAD_UNINT);
2085 return;
2086 }
2087
2088 zcram(vm_page_zone, addr, PAGE_SIZE);
2089
2090 lck_mtx_unlock(&vm_page_alloc_lock);
2091 }
2092
2093
2094 /*
2095 * vm_pool_low():
2096 *
2097 * Return true if it is not likely that a non-vm_privileged thread
2098 * can get memory without blocking. Advisory only, since the
2099 * situation may change under us.
2100 */
2101 int
2102 vm_pool_low(void)
2103 {
2104 /* No locking, at worst we will fib. */
2105 return( vm_page_free_count <= vm_page_free_reserved );
2106 }
2107
2108
2109
2110 /*
2111 * this is an interface to support bring-up of drivers
2112 * on platforms with physical memory > 4G...
2113 */
2114 int vm_himemory_mode = 2;
2115
2116
2117 /*
2118 * this interface exists to support hardware controllers
2119 * incapable of generating DMAs with more than 32 bits
2120 * of address on platforms with physical memory > 4G...
2121 */
2122 unsigned int vm_lopages_allocated_q = 0;
2123 unsigned int vm_lopages_allocated_cpm_success = 0;
2124 unsigned int vm_lopages_allocated_cpm_failed = 0;
2125 queue_head_t vm_lopage_queue_free;
2126
2127 vm_page_t
2128 vm_page_grablo(void)
2129 {
2130 vm_page_t mem;
2131
2132 if (vm_lopage_needed == FALSE)
2133 return (vm_page_grab());
2134
2135 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2136
2137 if ( !queue_empty(&vm_lopage_queue_free)) {
2138 queue_remove_first(&vm_lopage_queue_free,
2139 mem,
2140 vm_page_t,
2141 pageq);
2142 assert(vm_lopage_free_count);
2143
2144 vm_lopage_free_count--;
2145 vm_lopages_allocated_q++;
2146
2147 if (vm_lopage_free_count < vm_lopage_lowater)
2148 vm_lopage_refill = TRUE;
2149
2150 lck_mtx_unlock(&vm_page_queue_free_lock);
2151 } else {
2152 lck_mtx_unlock(&vm_page_queue_free_lock);
2153
2154 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
2155
2156 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2157 vm_lopages_allocated_cpm_failed++;
2158 lck_mtx_unlock(&vm_page_queue_free_lock);
2159
2160 return (VM_PAGE_NULL);
2161 }
2162 mem->busy = TRUE;
2163
2164 vm_page_lockspin_queues();
2165
2166 mem->gobbled = FALSE;
2167 vm_page_gobble_count--;
2168 vm_page_wire_count--;
2169
2170 vm_lopages_allocated_cpm_success++;
2171 vm_page_unlock_queues();
2172 }
2173 assert(mem->busy);
2174 assert(!mem->free);
2175 assert(!mem->pmapped);
2176 assert(!mem->wpmapped);
2177 assert(!pmap_is_noencrypt(mem->phys_page));
2178
2179 mem->pageq.next = NULL;
2180 mem->pageq.prev = NULL;
2181
2182 return (mem);
2183 }
2184
2185
2186 /*
2187 * vm_page_grab:
2188 *
2189 * first try to grab a page from the per-cpu free list...
2190 * this must be done while pre-emption is disabled... if
2191 * a page is available, we're done...
2192 * if no page is available, grab the vm_page_queue_free_lock
2193 * and see if current number of free pages would allow us
2194 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2195 * if there are pages available, disable preemption and
2196 * recheck the state of the per-cpu free list... we could
2197 * have been preempted and moved to a different cpu, or
2198 * some other thread could have re-filled it... if still
2199 * empty, figure out how many pages we can steal from the
2200 * global free queue and move to the per-cpu queue...
2201 * return 1 of these pages when done... only wakeup the
2202 * pageout_scan thread if we moved pages from the global
2203 * list... no need for the wakeup if we've satisfied the
2204 * request from the per-cpu queue.
2205 */
2206
2207
2208 vm_page_t
2209 vm_page_grab( void )
2210 {
2211 vm_page_t mem;
2212
2213
2214 disable_preemption();
2215
2216 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2217 return_page_from_cpu_list:
2218 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2219 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2220
2221 enable_preemption();
2222 mem->pageq.next = NULL;
2223
2224 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2225 assert(mem->tabled == FALSE);
2226 assert(mem->object == VM_OBJECT_NULL);
2227 assert(!mem->laundry);
2228 assert(!mem->free);
2229 assert(pmap_verify_free(mem->phys_page));
2230 assert(mem->busy);
2231 assert(!mem->encrypted);
2232 assert(!mem->pmapped);
2233 assert(!mem->wpmapped);
2234 assert(!mem->active);
2235 assert(!mem->inactive);
2236 assert(!mem->throttled);
2237 assert(!mem->speculative);
2238 assert(!pmap_is_noencrypt(mem->phys_page));
2239
2240 return mem;
2241 }
2242 enable_preemption();
2243
2244
2245 /*
2246 * Optionally produce warnings if the wire or gobble
2247 * counts exceed some threshold.
2248 */
2249 #if VM_PAGE_WIRE_COUNT_WARNING
2250 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2251 printf("mk: vm_page_grab(): high wired page count of %d\n",
2252 vm_page_wire_count);
2253 }
2254 #endif
2255 #if VM_PAGE_GOBBLE_COUNT_WARNING
2256 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2257 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2258 vm_page_gobble_count);
2259 }
2260 #endif
2261 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2262
2263 /*
2264 * Only let privileged threads (involved in pageout)
2265 * dip into the reserved pool.
2266 */
2267 if ((vm_page_free_count < vm_page_free_reserved) &&
2268 !(current_thread()->options & TH_OPT_VMPRIV)) {
2269 lck_mtx_unlock(&vm_page_queue_free_lock);
2270 mem = VM_PAGE_NULL;
2271 }
2272 else {
2273 vm_page_t head;
2274 vm_page_t tail;
2275 unsigned int pages_to_steal;
2276 unsigned int color;
2277
2278 while ( vm_page_free_count == 0 ) {
2279
2280 lck_mtx_unlock(&vm_page_queue_free_lock);
2281 /*
2282 * must be a privileged thread to be
2283 * in this state since a non-privileged
2284 * thread would have bailed if we were
2285 * under the vm_page_free_reserved mark
2286 */
2287 VM_PAGE_WAIT();
2288 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2289 }
2290
2291 disable_preemption();
2292
2293 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2294 lck_mtx_unlock(&vm_page_queue_free_lock);
2295
2296 /*
2297 * we got preempted and moved to another processor
2298 * or we got preempted and someone else ran and filled the cache
2299 */
2300 goto return_page_from_cpu_list;
2301 }
2302 if (vm_page_free_count <= vm_page_free_reserved)
2303 pages_to_steal = 1;
2304 else {
2305 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2306 pages_to_steal = vm_free_magazine_refill_limit;
2307 else
2308 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2309 }
2310 color = PROCESSOR_DATA(current_processor(), start_color);
2311 head = tail = NULL;
2312
2313 vm_page_free_count -= pages_to_steal;
2314
2315 while (pages_to_steal--) {
2316
2317 while (queue_empty(&vm_page_queue_free[color]))
2318 color = (color + 1) & vm_color_mask;
2319
2320 queue_remove_first(&vm_page_queue_free[color],
2321 mem,
2322 vm_page_t,
2323 pageq);
2324 mem->pageq.next = NULL;
2325 mem->pageq.prev = NULL;
2326
2327 assert(!mem->active);
2328 assert(!mem->inactive);
2329 assert(!mem->throttled);
2330 assert(!mem->speculative);
2331
2332 color = (color + 1) & vm_color_mask;
2333
2334 if (head == NULL)
2335 head = mem;
2336 else
2337 tail->pageq.next = (queue_t)mem;
2338 tail = mem;
2339
2340 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2341 assert(mem->tabled == FALSE);
2342 assert(mem->object == VM_OBJECT_NULL);
2343 assert(!mem->laundry);
2344 assert(mem->free);
2345 mem->free = FALSE;
2346
2347 assert(pmap_verify_free(mem->phys_page));
2348 assert(mem->busy);
2349 assert(!mem->free);
2350 assert(!mem->encrypted);
2351 assert(!mem->pmapped);
2352 assert(!mem->wpmapped);
2353 assert(!pmap_is_noencrypt(mem->phys_page));
2354 }
2355 lck_mtx_unlock(&vm_page_queue_free_lock);
2356
2357 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2358 PROCESSOR_DATA(current_processor(), start_color) = color;
2359
2360 /*
2361 * satisfy this request
2362 */
2363 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2364 mem = head;
2365 mem->pageq.next = NULL;
2366
2367 enable_preemption();
2368 }
2369 /*
2370 * Decide if we should poke the pageout daemon.
2371 * We do this if the free count is less than the low
2372 * water mark, or if the free count is less than the high
2373 * water mark (but above the low water mark) and the inactive
2374 * count is less than its target.
2375 *
2376 * We don't have the counts locked ... if they change a little,
2377 * it doesn't really matter.
2378 */
2379 if ((vm_page_free_count < vm_page_free_min) ||
2380 ((vm_page_free_count < vm_page_free_target) &&
2381 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2382 thread_wakeup((event_t) &vm_page_free_wanted);
2383
2384 VM_CHECK_MEMORYSTATUS;
2385
2386 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2387
2388 return mem;
2389 }
2390
2391 /*
2392 * vm_page_release:
2393 *
2394 * Return a page to the free list.
2395 */
2396
2397 void
2398 vm_page_release(
2399 register vm_page_t mem)
2400 {
2401 unsigned int color;
2402 int need_wakeup = 0;
2403 int need_priv_wakeup = 0;
2404
2405
2406 assert(!mem->private && !mem->fictitious);
2407 if (vm_page_free_verify) {
2408 assert(pmap_verify_free(mem->phys_page));
2409 }
2410 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2411
2412 pmap_clear_noencrypt(mem->phys_page);
2413
2414 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2415 #if DEBUG
2416 if (mem->free)
2417 panic("vm_page_release");
2418 #endif
2419
2420 assert(mem->busy);
2421 assert(!mem->laundry);
2422 assert(mem->object == VM_OBJECT_NULL);
2423 assert(mem->pageq.next == NULL &&
2424 mem->pageq.prev == NULL);
2425 assert(mem->listq.next == NULL &&
2426 mem->listq.prev == NULL);
2427
2428 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2429 vm_lopage_free_count < vm_lopage_free_limit &&
2430 mem->phys_page < max_valid_low_ppnum) {
2431 /*
2432 * this exists to support hardware controllers
2433 * incapable of generating DMAs with more than 32 bits
2434 * of address on platforms with physical memory > 4G...
2435 */
2436 queue_enter_first(&vm_lopage_queue_free,
2437 mem,
2438 vm_page_t,
2439 pageq);
2440 vm_lopage_free_count++;
2441
2442 if (vm_lopage_free_count >= vm_lopage_free_limit)
2443 vm_lopage_refill = FALSE;
2444
2445 mem->lopage = TRUE;
2446 } else {
2447 mem->lopage = FALSE;
2448 mem->free = TRUE;
2449
2450 color = mem->phys_page & vm_color_mask;
2451 queue_enter_first(&vm_page_queue_free[color],
2452 mem,
2453 vm_page_t,
2454 pageq);
2455 vm_page_free_count++;
2456 /*
2457 * Check if we should wake up someone waiting for page.
2458 * But don't bother waking them unless they can allocate.
2459 *
2460 * We wakeup only one thread, to prevent starvation.
2461 * Because the scheduling system handles wait queues FIFO,
2462 * if we wakeup all waiting threads, one greedy thread
2463 * can starve multiple niceguy threads. When the threads
2464 * all wakeup, the greedy threads runs first, grabs the page,
2465 * and waits for another page. It will be the first to run
2466 * when the next page is freed.
2467 *
2468 * However, there is a slight danger here.
2469 * The thread we wake might not use the free page.
2470 * Then the other threads could wait indefinitely
2471 * while the page goes unused. To forestall this,
2472 * the pageout daemon will keep making free pages
2473 * as long as vm_page_free_wanted is non-zero.
2474 */
2475
2476 assert(vm_page_free_count > 0);
2477 if (vm_page_free_wanted_privileged > 0) {
2478 vm_page_free_wanted_privileged--;
2479 need_priv_wakeup = 1;
2480 } else if (vm_page_free_wanted > 0 &&
2481 vm_page_free_count > vm_page_free_reserved) {
2482 vm_page_free_wanted--;
2483 need_wakeup = 1;
2484 }
2485 }
2486 lck_mtx_unlock(&vm_page_queue_free_lock);
2487
2488 if (need_priv_wakeup)
2489 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2490 else if (need_wakeup)
2491 thread_wakeup_one((event_t) &vm_page_free_count);
2492
2493 VM_CHECK_MEMORYSTATUS;
2494 }
2495
2496 /*
2497 * This version of vm_page_release() is used only at startup
2498 * when we are single-threaded and pages are being released
2499 * for the first time. Hence, no locking or unnecessary checks are made.
2500 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2501 */
2502 void
2503 vm_page_release_startup(
2504 register vm_page_t mem)
2505 {
2506 queue_t queue_free;
2507
2508 if (vm_lopage_free_count < vm_lopage_free_limit &&
2509 mem->phys_page < max_valid_low_ppnum) {
2510 mem->lopage = TRUE;
2511 vm_lopage_free_count++;
2512 queue_free = &vm_lopage_queue_free;
2513 } else {
2514 mem->lopage = FALSE;
2515 mem->free = TRUE;
2516 vm_page_free_count++;
2517 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2518 }
2519 queue_enter_first(queue_free, mem, vm_page_t, pageq);
2520 }
2521
2522 /*
2523 * vm_page_wait:
2524 *
2525 * Wait for a page to become available.
2526 * If there are plenty of free pages, then we don't sleep.
2527 *
2528 * Returns:
2529 * TRUE: There may be another page, try again
2530 * FALSE: We were interrupted out of our wait, don't try again
2531 */
2532
2533 boolean_t
2534 vm_page_wait(
2535 int interruptible )
2536 {
2537 /*
2538 * We can't use vm_page_free_reserved to make this
2539 * determination. Consider: some thread might
2540 * need to allocate two pages. The first allocation
2541 * succeeds, the second fails. After the first page is freed,
2542 * a call to vm_page_wait must really block.
2543 */
2544 kern_return_t wait_result;
2545 int need_wakeup = 0;
2546 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2547
2548 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2549
2550 if (is_privileged && vm_page_free_count) {
2551 lck_mtx_unlock(&vm_page_queue_free_lock);
2552 return TRUE;
2553 }
2554 if (vm_page_free_count < vm_page_free_target) {
2555
2556 if (is_privileged) {
2557 if (vm_page_free_wanted_privileged++ == 0)
2558 need_wakeup = 1;
2559 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2560 } else {
2561 if (vm_page_free_wanted++ == 0)
2562 need_wakeup = 1;
2563 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2564 }
2565 lck_mtx_unlock(&vm_page_queue_free_lock);
2566 counter(c_vm_page_wait_block++);
2567
2568 if (need_wakeup)
2569 thread_wakeup((event_t)&vm_page_free_wanted);
2570
2571 if (wait_result == THREAD_WAITING) {
2572 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2573 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2574 wait_result = thread_block(THREAD_CONTINUE_NULL);
2575 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2576 }
2577
2578 return(wait_result == THREAD_AWAKENED);
2579 } else {
2580 lck_mtx_unlock(&vm_page_queue_free_lock);
2581 return TRUE;
2582 }
2583 }
2584
2585 /*
2586 * vm_page_alloc:
2587 *
2588 * Allocate and return a memory cell associated
2589 * with this VM object/offset pair.
2590 *
2591 * Object must be locked.
2592 */
2593
2594 vm_page_t
2595 vm_page_alloc(
2596 vm_object_t object,
2597 vm_object_offset_t offset)
2598 {
2599 register vm_page_t mem;
2600
2601 vm_object_lock_assert_exclusive(object);
2602 mem = vm_page_grab();
2603 if (mem == VM_PAGE_NULL)
2604 return VM_PAGE_NULL;
2605
2606 vm_page_insert(mem, object, offset);
2607
2608 return(mem);
2609 }
2610
2611 /*
2612 * vm_page_alloc_guard:
2613 *
2614 * Allocate a fictitious page which will be used
2615 * as a guard page. The page will be inserted into
2616 * the object and returned to the caller.
2617 */
2618
2619 vm_page_t
2620 vm_page_alloc_guard(
2621 vm_object_t object,
2622 vm_object_offset_t offset)
2623 {
2624 register vm_page_t mem;
2625
2626 vm_object_lock_assert_exclusive(object);
2627 mem = vm_page_grab_guard();
2628 if (mem == VM_PAGE_NULL)
2629 return VM_PAGE_NULL;
2630
2631 vm_page_insert(mem, object, offset);
2632
2633 return(mem);
2634 }
2635
2636
2637 counter(unsigned int c_laundry_pages_freed = 0;)
2638
2639 /*
2640 * vm_page_free_prepare:
2641 *
2642 * Removes page from any queue it may be on
2643 * and disassociates it from its VM object.
2644 *
2645 * Object and page queues must be locked prior to entry.
2646 */
2647 static void
2648 vm_page_free_prepare(
2649 vm_page_t mem)
2650 {
2651 vm_page_free_prepare_queues(mem);
2652 vm_page_free_prepare_object(mem, TRUE);
2653 }
2654
2655
2656 void
2657 vm_page_free_prepare_queues(
2658 vm_page_t mem)
2659 {
2660 VM_PAGE_CHECK(mem);
2661 assert(!mem->free);
2662 assert(!mem->cleaning);
2663
2664 #if MACH_ASSERT || DEBUG
2665 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2666 if (mem->free)
2667 panic("vm_page_free: freeing page on free list\n");
2668 #endif /* MACH_ASSERT || DEBUG */
2669 if (mem->object) {
2670 vm_object_lock_assert_exclusive(mem->object);
2671 }
2672 if (mem->laundry) {
2673 /*
2674 * We may have to free a page while it's being laundered
2675 * if we lost its pager (due to a forced unmount, for example).
2676 * We need to call vm_pageout_steal_laundry() before removing
2677 * the page from its VM object, so that we can remove it
2678 * from its pageout queue and adjust the laundry accounting
2679 */
2680 vm_pageout_steal_laundry(mem, TRUE);
2681 counter(++c_laundry_pages_freed);
2682 }
2683
2684 vm_page_queues_remove(mem); /* clears local/active/inactive/throttled/speculative */
2685
2686 if (VM_PAGE_WIRED(mem)) {
2687 if (mem->object) {
2688 assert(mem->object->wired_page_count > 0);
2689 mem->object->wired_page_count--;
2690 if (!mem->object->wired_page_count) {
2691 VM_OBJECT_UNWIRED(mem->object);
2692 }
2693
2694 assert(mem->object->resident_page_count >=
2695 mem->object->wired_page_count);
2696
2697 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2698 OSAddAtomic(+1, &vm_page_purgeable_count);
2699 assert(vm_page_purgeable_wired_count > 0);
2700 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2701 }
2702 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2703 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2704 mem->object->vo_purgeable_owner != TASK_NULL) {
2705 task_t owner;
2706
2707 owner = mem->object->vo_purgeable_owner;
2708 /*
2709 * While wired, this page was accounted
2710 * as "non-volatile" but it should now
2711 * be accounted as "volatile".
2712 */
2713 /* one less "non-volatile"... */
2714 ledger_debit(owner->ledger,
2715 task_ledgers.purgeable_nonvolatile,
2716 PAGE_SIZE);
2717 /* ... and "phys_footprint" */
2718 ledger_debit(owner->ledger,
2719 task_ledgers.phys_footprint,
2720 PAGE_SIZE);
2721 /* one more "volatile" */
2722 ledger_credit(owner->ledger,
2723 task_ledgers.purgeable_volatile,
2724 PAGE_SIZE);
2725 }
2726 }
2727 if (!mem->private && !mem->fictitious)
2728 vm_page_wire_count--;
2729 mem->wire_count = 0;
2730 assert(!mem->gobbled);
2731 } else if (mem->gobbled) {
2732 if (!mem->private && !mem->fictitious)
2733 vm_page_wire_count--;
2734 vm_page_gobble_count--;
2735 }
2736 }
2737
2738
2739 void
2740 vm_page_free_prepare_object(
2741 vm_page_t mem,
2742 boolean_t remove_from_hash)
2743 {
2744 if (mem->tabled)
2745 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2746
2747 PAGE_WAKEUP(mem); /* clears wanted */
2748
2749 if (mem->private) {
2750 mem->private = FALSE;
2751 mem->fictitious = TRUE;
2752 mem->phys_page = vm_page_fictitious_addr;
2753 }
2754 if ( !mem->fictitious) {
2755 vm_page_init(mem, mem->phys_page, mem->lopage);
2756 }
2757 }
2758
2759
2760 /*
2761 * vm_page_free:
2762 *
2763 * Returns the given page to the free list,
2764 * disassociating it with any VM object.
2765 *
2766 * Object and page queues must be locked prior to entry.
2767 */
2768 void
2769 vm_page_free(
2770 vm_page_t mem)
2771 {
2772 vm_page_free_prepare(mem);
2773
2774 if (mem->fictitious) {
2775 vm_page_release_fictitious(mem);
2776 } else {
2777 vm_page_release(mem);
2778 }
2779 }
2780
2781
2782 void
2783 vm_page_free_unlocked(
2784 vm_page_t mem,
2785 boolean_t remove_from_hash)
2786 {
2787 vm_page_lockspin_queues();
2788 vm_page_free_prepare_queues(mem);
2789 vm_page_unlock_queues();
2790
2791 vm_page_free_prepare_object(mem, remove_from_hash);
2792
2793 if (mem->fictitious) {
2794 vm_page_release_fictitious(mem);
2795 } else {
2796 vm_page_release(mem);
2797 }
2798 }
2799
2800
2801 /*
2802 * Free a list of pages. The list can be up to several hundred pages,
2803 * as blocked up by vm_pageout_scan().
2804 * The big win is not having to take the free list lock once
2805 * per page.
2806 */
2807 void
2808 vm_page_free_list(
2809 vm_page_t freeq,
2810 boolean_t prepare_object)
2811 {
2812 vm_page_t mem;
2813 vm_page_t nxt;
2814 vm_page_t local_freeq;
2815 int pg_count;
2816
2817 while (freeq) {
2818
2819 pg_count = 0;
2820 local_freeq = VM_PAGE_NULL;
2821 mem = freeq;
2822
2823 /*
2824 * break up the processing into smaller chunks so
2825 * that we can 'pipeline' the pages onto the
2826 * free list w/o introducing too much
2827 * contention on the global free queue lock
2828 */
2829 while (mem && pg_count < 64) {
2830
2831 assert(!mem->inactive);
2832 assert(!mem->active);
2833 assert(!mem->throttled);
2834 assert(!mem->free);
2835 assert(!mem->speculative);
2836 assert(!VM_PAGE_WIRED(mem));
2837 assert(mem->pageq.prev == NULL);
2838
2839 nxt = (vm_page_t)(mem->pageq.next);
2840
2841 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2842 assert(pmap_verify_free(mem->phys_page));
2843 }
2844 if (prepare_object == TRUE)
2845 vm_page_free_prepare_object(mem, TRUE);
2846
2847 if (!mem->fictitious) {
2848 assert(mem->busy);
2849
2850 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2851 vm_lopage_free_count < vm_lopage_free_limit &&
2852 mem->phys_page < max_valid_low_ppnum) {
2853 mem->pageq.next = NULL;
2854 vm_page_release(mem);
2855 } else {
2856 /*
2857 * IMPORTANT: we can't set the page "free" here
2858 * because that would make the page eligible for
2859 * a physically-contiguous allocation (see
2860 * vm_page_find_contiguous()) right away (we don't
2861 * hold the vm_page_queue_free lock). That would
2862 * cause trouble because the page is not actually
2863 * in the free queue yet...
2864 */
2865 mem->pageq.next = (queue_entry_t)local_freeq;
2866 local_freeq = mem;
2867 pg_count++;
2868
2869 pmap_clear_noencrypt(mem->phys_page);
2870 }
2871 } else {
2872 assert(mem->phys_page == vm_page_fictitious_addr ||
2873 mem->phys_page == vm_page_guard_addr);
2874 vm_page_release_fictitious(mem);
2875 }
2876 mem = nxt;
2877 }
2878 freeq = mem;
2879
2880 if ( (mem = local_freeq) ) {
2881 unsigned int avail_free_count;
2882 unsigned int need_wakeup = 0;
2883 unsigned int need_priv_wakeup = 0;
2884
2885 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2886
2887 while (mem) {
2888 int color;
2889
2890 nxt = (vm_page_t)(mem->pageq.next);
2891
2892 assert(!mem->free);
2893 assert(mem->busy);
2894 mem->free = TRUE;
2895
2896 color = mem->phys_page & vm_color_mask;
2897 queue_enter_first(&vm_page_queue_free[color],
2898 mem,
2899 vm_page_t,
2900 pageq);
2901 mem = nxt;
2902 }
2903 vm_page_free_count += pg_count;
2904 avail_free_count = vm_page_free_count;
2905
2906 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2907
2908 if (avail_free_count < vm_page_free_wanted_privileged) {
2909 need_priv_wakeup = avail_free_count;
2910 vm_page_free_wanted_privileged -= avail_free_count;
2911 avail_free_count = 0;
2912 } else {
2913 need_priv_wakeup = vm_page_free_wanted_privileged;
2914 vm_page_free_wanted_privileged = 0;
2915 avail_free_count -= vm_page_free_wanted_privileged;
2916 }
2917 }
2918 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2919 unsigned int available_pages;
2920
2921 available_pages = avail_free_count - vm_page_free_reserved;
2922
2923 if (available_pages >= vm_page_free_wanted) {
2924 need_wakeup = vm_page_free_wanted;
2925 vm_page_free_wanted = 0;
2926 } else {
2927 need_wakeup = available_pages;
2928 vm_page_free_wanted -= available_pages;
2929 }
2930 }
2931 lck_mtx_unlock(&vm_page_queue_free_lock);
2932
2933 if (need_priv_wakeup != 0) {
2934 /*
2935 * There shouldn't be that many VM-privileged threads,
2936 * so let's wake them all up, even if we don't quite
2937 * have enough pages to satisfy them all.
2938 */
2939 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2940 }
2941 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2942 /*
2943 * We don't expect to have any more waiters
2944 * after this, so let's wake them all up at
2945 * once.
2946 */
2947 thread_wakeup((event_t) &vm_page_free_count);
2948 } else for (; need_wakeup != 0; need_wakeup--) {
2949 /*
2950 * Wake up one waiter per page we just released.
2951 */
2952 thread_wakeup_one((event_t) &vm_page_free_count);
2953 }
2954
2955 VM_CHECK_MEMORYSTATUS;
2956 }
2957 }
2958 }
2959
2960
2961 /*
2962 * vm_page_wire:
2963 *
2964 * Mark this page as wired down by yet
2965 * another map, removing it from paging queues
2966 * as necessary.
2967 *
2968 * The page's object and the page queues must be locked.
2969 */
2970
2971
2972 void
2973 vm_page_wire(
2974 register vm_page_t mem,
2975 vm_tag_t tag,
2976 boolean_t check_memorystatus)
2977 {
2978
2979 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2980
2981 VM_PAGE_CHECK(mem);
2982 if (mem->object) {
2983 vm_object_lock_assert_exclusive(mem->object);
2984 } else {
2985 /*
2986 * In theory, the page should be in an object before it
2987 * gets wired, since we need to hold the object lock
2988 * to update some fields in the page structure.
2989 * However, some code (i386 pmap, for example) might want
2990 * to wire a page before it gets inserted into an object.
2991 * That's somewhat OK, as long as nobody else can get to
2992 * that page and update it at the same time.
2993 */
2994 }
2995 #if DEBUG
2996 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2997 #endif
2998 if ( !VM_PAGE_WIRED(mem)) {
2999
3000 if (mem->pageout_queue) {
3001 mem->pageout = FALSE;
3002 vm_pageout_throttle_up(mem);
3003 }
3004 vm_page_queues_remove(mem);
3005
3006 if (mem->object) {
3007
3008 if (!mem->private && !mem->fictitious)
3009 {
3010 if (!mem->object->wired_page_count)
3011 {
3012 assert(VM_KERN_MEMORY_NONE != tag);
3013 mem->object->wire_tag = tag;
3014 VM_OBJECT_WIRED(mem->object);
3015 }
3016 }
3017 mem->object->wired_page_count++;
3018
3019 assert(mem->object->resident_page_count >=
3020 mem->object->wired_page_count);
3021 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
3022 assert(vm_page_purgeable_count > 0);
3023 OSAddAtomic(-1, &vm_page_purgeable_count);
3024 OSAddAtomic(1, &vm_page_purgeable_wired_count);
3025 }
3026 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
3027 mem->object->purgable == VM_PURGABLE_EMPTY) &&
3028 mem->object->vo_purgeable_owner != TASK_NULL) {
3029 task_t owner;
3030
3031 owner = mem->object->vo_purgeable_owner;
3032 /* less volatile bytes */
3033 ledger_debit(owner->ledger,
3034 task_ledgers.purgeable_volatile,
3035 PAGE_SIZE);
3036 /* more not-quite-volatile bytes */
3037 ledger_credit(owner->ledger,
3038 task_ledgers.purgeable_nonvolatile,
3039 PAGE_SIZE);
3040 /* more footprint */
3041 ledger_credit(owner->ledger,
3042 task_ledgers.phys_footprint,
3043 PAGE_SIZE);
3044 }
3045 if (mem->object->all_reusable) {
3046 /*
3047 * Wired pages are not counted as "re-usable"
3048 * in "all_reusable" VM objects, so nothing
3049 * to do here.
3050 */
3051 } else if (mem->reusable) {
3052 /*
3053 * This page is not "re-usable" when it's
3054 * wired, so adjust its state and the
3055 * accounting.
3056 */
3057 vm_object_reuse_pages(mem->object,
3058 mem->offset,
3059 mem->offset+PAGE_SIZE_64,
3060 FALSE);
3061 }
3062 }
3063 assert(!mem->reusable);
3064
3065 if (!mem->private && !mem->fictitious && !mem->gobbled)
3066 vm_page_wire_count++;
3067 if (mem->gobbled)
3068 vm_page_gobble_count--;
3069 mem->gobbled = FALSE;
3070
3071 if (check_memorystatus == TRUE) {
3072 VM_CHECK_MEMORYSTATUS;
3073 }
3074 /*
3075 * ENCRYPTED SWAP:
3076 * The page could be encrypted, but
3077 * We don't have to decrypt it here
3078 * because we don't guarantee that the
3079 * data is actually valid at this point.
3080 * The page will get decrypted in
3081 * vm_fault_wire() if needed.
3082 */
3083 }
3084 assert(!mem->gobbled);
3085 mem->wire_count++;
3086 VM_PAGE_CHECK(mem);
3087 }
3088
3089 /*
3090 * vm_page_unwire:
3091 *
3092 * Release one wiring of this page, potentially
3093 * enabling it to be paged again.
3094 *
3095 * The page's object and the page queues must be locked.
3096 */
3097 void
3098 vm_page_unwire(
3099 vm_page_t mem,
3100 boolean_t queueit)
3101 {
3102
3103 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
3104
3105 VM_PAGE_CHECK(mem);
3106 assert(VM_PAGE_WIRED(mem));
3107 assert(mem->object != VM_OBJECT_NULL);
3108 #if DEBUG
3109 vm_object_lock_assert_exclusive(mem->object);
3110 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3111 #endif
3112 if (--mem->wire_count == 0) {
3113 assert(!mem->private && !mem->fictitious);
3114 vm_page_wire_count--;
3115 assert(mem->object->wired_page_count > 0);
3116 mem->object->wired_page_count--;
3117 if (!mem->object->wired_page_count) {
3118 VM_OBJECT_UNWIRED(mem->object);
3119 }
3120 assert(mem->object->resident_page_count >=
3121 mem->object->wired_page_count);
3122 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
3123 OSAddAtomic(+1, &vm_page_purgeable_count);
3124 assert(vm_page_purgeable_wired_count > 0);
3125 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
3126 }
3127 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
3128 mem->object->purgable == VM_PURGABLE_EMPTY) &&
3129 mem->object->vo_purgeable_owner != TASK_NULL) {
3130 task_t owner;
3131
3132 owner = mem->object->vo_purgeable_owner;
3133 /* more volatile bytes */
3134 ledger_credit(owner->ledger,
3135 task_ledgers.purgeable_volatile,
3136 PAGE_SIZE);
3137 /* less not-quite-volatile bytes */
3138 ledger_debit(owner->ledger,
3139 task_ledgers.purgeable_nonvolatile,
3140 PAGE_SIZE);
3141 /* less footprint */
3142 ledger_debit(owner->ledger,
3143 task_ledgers.phys_footprint,
3144 PAGE_SIZE);
3145 }
3146 assert(mem->object != kernel_object);
3147 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3148
3149 if (queueit == TRUE) {
3150 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3151 vm_page_deactivate(mem);
3152 } else {
3153 vm_page_activate(mem);
3154 }
3155 }
3156
3157 VM_CHECK_MEMORYSTATUS;
3158
3159 }
3160 VM_PAGE_CHECK(mem);
3161 }
3162
3163 /*
3164 * vm_page_deactivate:
3165 *
3166 * Returns the given page to the inactive list,
3167 * indicating that no physical maps have access
3168 * to this page. [Used by the physical mapping system.]
3169 *
3170 * The page queues must be locked.
3171 */
3172 void
3173 vm_page_deactivate(
3174 vm_page_t m)
3175 {
3176 vm_page_deactivate_internal(m, TRUE);
3177 }
3178
3179
3180 void
3181 vm_page_deactivate_internal(
3182 vm_page_t m,
3183 boolean_t clear_hw_reference)
3184 {
3185
3186 VM_PAGE_CHECK(m);
3187 assert(m->object != kernel_object);
3188 assert(m->phys_page != vm_page_guard_addr);
3189
3190 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3191 #if DEBUG
3192 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3193 #endif
3194 /*
3195 * This page is no longer very interesting. If it was
3196 * interesting (active or inactive/referenced), then we
3197 * clear the reference bit and (re)enter it in the
3198 * inactive queue. Note wired pages should not have
3199 * their reference bit cleared.
3200 */
3201 assert ( !(m->absent && !m->unusual));
3202
3203 if (m->gobbled) { /* can this happen? */
3204 assert( !VM_PAGE_WIRED(m));
3205
3206 if (!m->private && !m->fictitious)
3207 vm_page_wire_count--;
3208 vm_page_gobble_count--;
3209 m->gobbled = FALSE;
3210 }
3211 /*
3212 * if this page is currently on the pageout queue, we can't do the
3213 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3214 * and we can't remove it manually since we would need the object lock
3215 * (which is not required here) to decrement the activity_in_progress
3216 * reference which is held on the object while the page is in the pageout queue...
3217 * just let the normal laundry processing proceed
3218 */
3219 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3220 return;
3221
3222 if (!m->absent && clear_hw_reference == TRUE)
3223 pmap_clear_reference(m->phys_page);
3224
3225 m->reference = FALSE;
3226 m->no_cache = FALSE;
3227
3228 if (!m->inactive) {
3229 vm_page_queues_remove(m);
3230
3231 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3232 m->dirty && m->object->internal &&
3233 (m->object->purgable == VM_PURGABLE_DENY ||
3234 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3235 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3236 vm_page_check_pageable_safe(m);
3237 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3238 m->throttled = TRUE;
3239 vm_page_throttled_count++;
3240 } else {
3241 if (m->object->named && m->object->ref_count == 1) {
3242 vm_page_speculate(m, FALSE);
3243 #if DEVELOPMENT || DEBUG
3244 vm_page_speculative_recreated++;
3245 #endif
3246 } else {
3247 vm_page_enqueue_inactive(m, FALSE);
3248 }
3249 }
3250 }
3251 }
3252
3253 /*
3254 * vm_page_enqueue_cleaned
3255 *
3256 * Put the page on the cleaned queue, mark it cleaned, etc.
3257 * Being on the cleaned queue (and having m->clean_queue set)
3258 * does ** NOT ** guarantee that the page is clean!
3259 *
3260 * Call with the queues lock held.
3261 */
3262
3263 void vm_page_enqueue_cleaned(vm_page_t m)
3264 {
3265 assert(m->phys_page != vm_page_guard_addr);
3266 #if DEBUG
3267 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3268 #endif
3269 assert( !(m->absent && !m->unusual));
3270
3271 if (m->gobbled) {
3272 assert( !VM_PAGE_WIRED(m));
3273 if (!m->private && !m->fictitious)
3274 vm_page_wire_count--;
3275 vm_page_gobble_count--;
3276 m->gobbled = FALSE;
3277 }
3278 /*
3279 * if this page is currently on the pageout queue, we can't do the
3280 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3281 * and we can't remove it manually since we would need the object lock
3282 * (which is not required here) to decrement the activity_in_progress
3283 * reference which is held on the object while the page is in the pageout queue...
3284 * just let the normal laundry processing proceed
3285 */
3286 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3287 return;
3288
3289 vm_page_queues_remove(m);
3290
3291 vm_page_check_pageable_safe(m);
3292 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3293 m->clean_queue = TRUE;
3294 vm_page_cleaned_count++;
3295
3296 m->inactive = TRUE;
3297 vm_page_inactive_count++;
3298 if (m->object->internal) {
3299 vm_page_pageable_internal_count++;
3300 } else {
3301 vm_page_pageable_external_count++;
3302 }
3303
3304 vm_pageout_enqueued_cleaned++;
3305 }
3306
3307 /*
3308 * vm_page_activate:
3309 *
3310 * Put the specified page on the active list (if appropriate).
3311 *
3312 * The page queues must be locked.
3313 */
3314
3315 void
3316 vm_page_activate(
3317 register vm_page_t m)
3318 {
3319 VM_PAGE_CHECK(m);
3320 #ifdef FIXME_4778297
3321 assert(m->object != kernel_object);
3322 #endif
3323 assert(m->phys_page != vm_page_guard_addr);
3324 #if DEBUG
3325 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3326 #endif
3327 assert( !(m->absent && !m->unusual));
3328
3329 if (m->gobbled) {
3330 assert( !VM_PAGE_WIRED(m));
3331 if (!m->private && !m->fictitious)
3332 vm_page_wire_count--;
3333 vm_page_gobble_count--;
3334 m->gobbled = FALSE;
3335 }
3336 /*
3337 * if this page is currently on the pageout queue, we can't do the
3338 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3339 * and we can't remove it manually since we would need the object lock
3340 * (which is not required here) to decrement the activity_in_progress
3341 * reference which is held on the object while the page is in the pageout queue...
3342 * just let the normal laundry processing proceed
3343 */
3344 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3345 return;
3346
3347 #if DEBUG
3348 if (m->active)
3349 panic("vm_page_activate: already active");
3350 #endif
3351
3352 if (m->speculative) {
3353 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3354 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3355 }
3356
3357 vm_page_queues_remove(m);
3358
3359 if ( !VM_PAGE_WIRED(m)) {
3360 vm_page_check_pageable_safe(m);
3361 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3362 m->dirty && m->object->internal &&
3363 (m->object->purgable == VM_PURGABLE_DENY ||
3364 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3365 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3366 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3367 m->throttled = TRUE;
3368 vm_page_throttled_count++;
3369 } else {
3370 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3371 m->active = TRUE;
3372 vm_page_active_count++;
3373 if (m->object->internal) {
3374 vm_page_pageable_internal_count++;
3375 } else {
3376 vm_page_pageable_external_count++;
3377 }
3378 }
3379 m->reference = TRUE;
3380 m->no_cache = FALSE;
3381 }
3382 VM_PAGE_CHECK(m);
3383 }
3384
3385
3386 /*
3387 * vm_page_speculate:
3388 *
3389 * Put the specified page on the speculative list (if appropriate).
3390 *
3391 * The page queues must be locked.
3392 */
3393 void
3394 vm_page_speculate(
3395 vm_page_t m,
3396 boolean_t new)
3397 {
3398 struct vm_speculative_age_q *aq;
3399
3400 VM_PAGE_CHECK(m);
3401 vm_page_check_pageable_safe(m);
3402
3403 assert(m->phys_page != vm_page_guard_addr);
3404 #if DEBUG
3405 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3406 #endif
3407 assert( !(m->absent && !m->unusual));
3408
3409 /*
3410 * if this page is currently on the pageout queue, we can't do the
3411 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3412 * and we can't remove it manually since we would need the object lock
3413 * (which is not required here) to decrement the activity_in_progress
3414 * reference which is held on the object while the page is in the pageout queue...
3415 * just let the normal laundry processing proceed
3416 */
3417 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3418 return;
3419
3420 vm_page_queues_remove(m);
3421
3422 if ( !VM_PAGE_WIRED(m)) {
3423 mach_timespec_t ts;
3424 clock_sec_t sec;
3425 clock_nsec_t nsec;
3426
3427 clock_get_system_nanotime(&sec, &nsec);
3428 ts.tv_sec = (unsigned int) sec;
3429 ts.tv_nsec = nsec;
3430
3431 if (vm_page_speculative_count == 0) {
3432
3433 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3434 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3435
3436 aq = &vm_page_queue_speculative[speculative_age_index];
3437
3438 /*
3439 * set the timer to begin a new group
3440 */
3441 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3442 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3443
3444 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3445 } else {
3446 aq = &vm_page_queue_speculative[speculative_age_index];
3447
3448 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3449
3450 speculative_age_index++;
3451
3452 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3453 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3454 if (speculative_age_index == speculative_steal_index) {
3455 speculative_steal_index = speculative_age_index + 1;
3456
3457 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3458 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3459 }
3460 aq = &vm_page_queue_speculative[speculative_age_index];
3461
3462 if (!queue_empty(&aq->age_q))
3463 vm_page_speculate_ageit(aq);
3464
3465 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3466 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3467
3468 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3469 }
3470 }
3471 enqueue_tail(&aq->age_q, &m->pageq);
3472 m->speculative = TRUE;
3473 vm_page_speculative_count++;
3474 if (m->object->internal) {
3475 vm_page_pageable_internal_count++;
3476 } else {
3477 vm_page_pageable_external_count++;
3478 }
3479
3480 if (new == TRUE) {
3481 vm_object_lock_assert_exclusive(m->object);
3482
3483 m->object->pages_created++;
3484 #if DEVELOPMENT || DEBUG
3485 vm_page_speculative_created++;
3486 #endif
3487 }
3488 }
3489 VM_PAGE_CHECK(m);
3490 }
3491
3492
3493 /*
3494 * move pages from the specified aging bin to
3495 * the speculative bin that pageout_scan claims from
3496 *
3497 * The page queues must be locked.
3498 */
3499 void
3500 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3501 {
3502 struct vm_speculative_age_q *sq;
3503 vm_page_t t;
3504
3505 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3506
3507 if (queue_empty(&sq->age_q)) {
3508 sq->age_q.next = aq->age_q.next;
3509 sq->age_q.prev = aq->age_q.prev;
3510
3511 t = (vm_page_t)sq->age_q.next;
3512 t->pageq.prev = &sq->age_q;
3513
3514 t = (vm_page_t)sq->age_q.prev;
3515 t->pageq.next = &sq->age_q;
3516 } else {
3517 t = (vm_page_t)sq->age_q.prev;
3518 t->pageq.next = aq->age_q.next;
3519
3520 t = (vm_page_t)aq->age_q.next;
3521 t->pageq.prev = sq->age_q.prev;
3522
3523 t = (vm_page_t)aq->age_q.prev;
3524 t->pageq.next = &sq->age_q;
3525
3526 sq->age_q.prev = aq->age_q.prev;
3527 }
3528 queue_init(&aq->age_q);
3529 }
3530
3531
3532 void
3533 vm_page_lru(
3534 vm_page_t m)
3535 {
3536 VM_PAGE_CHECK(m);
3537 assert(m->object != kernel_object);
3538 assert(m->phys_page != vm_page_guard_addr);
3539
3540 #if DEBUG
3541 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3542 #endif
3543 /*
3544 * if this page is currently on the pageout queue, we can't do the
3545 * vm_page_queues_remove (which doesn't handle the pageout queue case)
3546 * and we can't remove it manually since we would need the object lock
3547 * (which is not required here) to decrement the activity_in_progress
3548 * reference which is held on the object while the page is in the pageout queue...
3549 * just let the normal laundry processing proceed
3550 */
3551 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3552 return;
3553
3554 m->no_cache = FALSE;
3555
3556 vm_page_queues_remove(m);
3557
3558 vm_page_enqueue_inactive(m, FALSE);
3559 }
3560
3561
3562 void
3563 vm_page_reactivate_all_throttled(void)
3564 {
3565 vm_page_t first_throttled, last_throttled;
3566 vm_page_t first_active;
3567 vm_page_t m;
3568 int extra_active_count;
3569 int extra_internal_count, extra_external_count;
3570
3571 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3572 return;
3573
3574 extra_active_count = 0;
3575 extra_internal_count = 0;
3576 extra_external_count = 0;
3577 vm_page_lock_queues();
3578 if (! queue_empty(&vm_page_queue_throttled)) {
3579 /*
3580 * Switch "throttled" pages to "active".
3581 */
3582 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3583 VM_PAGE_CHECK(m);
3584 assert(m->throttled);
3585 assert(!m->active);
3586 assert(!m->inactive);
3587 assert(!m->speculative);
3588 assert(!VM_PAGE_WIRED(m));
3589
3590 extra_active_count++;
3591 if (m->object->internal) {
3592 extra_internal_count++;
3593 } else {
3594 extra_external_count++;
3595 }
3596
3597 m->throttled = FALSE;
3598 m->active = TRUE;
3599 VM_PAGE_CHECK(m);
3600 }
3601
3602 /*
3603 * Transfer the entire throttled queue to a regular LRU page queues.
3604 * We insert it at the head of the active queue, so that these pages
3605 * get re-evaluated by the LRU algorithm first, since they've been
3606 * completely out of it until now.
3607 */
3608 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3609 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3610 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3611 if (queue_empty(&vm_page_queue_active)) {
3612 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3613 } else {
3614 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3615 }
3616 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3617 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3618 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3619
3620 #if DEBUG
3621 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3622 #endif
3623 queue_init(&vm_page_queue_throttled);
3624 /*
3625 * Adjust the global page counts.
3626 */
3627 vm_page_active_count += extra_active_count;
3628 vm_page_pageable_internal_count += extra_internal_count;
3629 vm_page_pageable_external_count += extra_external_count;
3630 vm_page_throttled_count = 0;
3631 }
3632 assert(vm_page_throttled_count == 0);
3633 assert(queue_empty(&vm_page_queue_throttled));
3634 vm_page_unlock_queues();
3635 }
3636
3637
3638 /*
3639 * move pages from the indicated local queue to the global active queue
3640 * its ok to fail if we're below the hard limit and force == FALSE
3641 * the nolocks == TRUE case is to allow this function to be run on
3642 * the hibernate path
3643 */
3644
3645 void
3646 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3647 {
3648 struct vpl *lq;
3649 vm_page_t first_local, last_local;
3650 vm_page_t first_active;
3651 vm_page_t m;
3652 uint32_t count = 0;
3653
3654 if (vm_page_local_q == NULL)
3655 return;
3656
3657 lq = &vm_page_local_q[lid].vpl_un.vpl;
3658
3659 if (nolocks == FALSE) {
3660 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3661 if ( !vm_page_trylockspin_queues())
3662 return;
3663 } else
3664 vm_page_lockspin_queues();
3665
3666 VPL_LOCK(&lq->vpl_lock);
3667 }
3668 if (lq->vpl_count) {
3669 /*
3670 * Switch "local" pages to "active".
3671 */
3672 assert(!queue_empty(&lq->vpl_queue));
3673
3674 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3675 VM_PAGE_CHECK(m);
3676 vm_page_check_pageable_safe(m);
3677 assert(m->local);
3678 assert(!m->active);
3679 assert(!m->inactive);
3680 assert(!m->speculative);
3681 assert(!VM_PAGE_WIRED(m));
3682 assert(!m->throttled);
3683 assert(!m->fictitious);
3684
3685 if (m->local_id != lid)
3686 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3687
3688 m->local_id = 0;
3689 m->local = FALSE;
3690 m->active = TRUE;
3691 VM_PAGE_CHECK(m);
3692
3693 count++;
3694 }
3695 if (count != lq->vpl_count)
3696 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3697
3698 /*
3699 * Transfer the entire local queue to a regular LRU page queues.
3700 */
3701 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3702 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3703 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3704
3705 if (queue_empty(&vm_page_queue_active)) {
3706 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3707 } else {
3708 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3709 }
3710 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3711 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3712 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3713
3714 queue_init(&lq->vpl_queue);
3715 /*
3716 * Adjust the global page counts.
3717 */
3718 vm_page_active_count += lq->vpl_count;
3719 vm_page_pageable_internal_count += lq->vpl_internal_count;
3720 vm_page_pageable_external_count += lq->vpl_external_count;
3721 lq->vpl_count = 0;
3722 lq->vpl_internal_count = 0;
3723 lq->vpl_external_count = 0;
3724 }
3725 assert(queue_empty(&lq->vpl_queue));
3726
3727 if (nolocks == FALSE) {
3728 VPL_UNLOCK(&lq->vpl_lock);
3729 vm_page_unlock_queues();
3730 }
3731 }
3732
3733 /*
3734 * vm_page_part_zero_fill:
3735 *
3736 * Zero-fill a part of the page.
3737 */
3738 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3739 void
3740 vm_page_part_zero_fill(
3741 vm_page_t m,
3742 vm_offset_t m_pa,
3743 vm_size_t len)
3744 {
3745
3746 #if 0
3747 /*
3748 * we don't hold the page queue lock
3749 * so this check isn't safe to make
3750 */
3751 VM_PAGE_CHECK(m);
3752 #endif
3753
3754 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3755 pmap_zero_part_page(m->phys_page, m_pa, len);
3756 #else
3757 vm_page_t tmp;
3758 while (1) {
3759 tmp = vm_page_grab();
3760 if (tmp == VM_PAGE_NULL) {
3761 vm_page_wait(THREAD_UNINT);
3762 continue;
3763 }
3764 break;
3765 }
3766 vm_page_zero_fill(tmp);
3767 if(m_pa != 0) {
3768 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3769 }
3770 if((m_pa + len) < PAGE_SIZE) {
3771 vm_page_part_copy(m, m_pa + len, tmp,
3772 m_pa + len, PAGE_SIZE - (m_pa + len));
3773 }
3774 vm_page_copy(tmp,m);
3775 VM_PAGE_FREE(tmp);
3776 #endif
3777
3778 }
3779
3780 /*
3781 * vm_page_zero_fill:
3782 *
3783 * Zero-fill the specified page.
3784 */
3785 void
3786 vm_page_zero_fill(
3787 vm_page_t m)
3788 {
3789 XPR(XPR_VM_PAGE,
3790 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3791 m->object, m->offset, m, 0,0);
3792 #if 0
3793 /*
3794 * we don't hold the page queue lock
3795 * so this check isn't safe to make
3796 */
3797 VM_PAGE_CHECK(m);
3798 #endif
3799
3800 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3801 pmap_zero_page(m->phys_page);
3802 }
3803
3804 /*
3805 * vm_page_part_copy:
3806 *
3807 * copy part of one page to another
3808 */
3809
3810 void
3811 vm_page_part_copy(
3812 vm_page_t src_m,
3813 vm_offset_t src_pa,
3814 vm_page_t dst_m,
3815 vm_offset_t dst_pa,
3816 vm_size_t len)
3817 {
3818 #if 0
3819 /*
3820 * we don't hold the page queue lock
3821 * so this check isn't safe to make
3822 */
3823 VM_PAGE_CHECK(src_m);
3824 VM_PAGE_CHECK(dst_m);
3825 #endif
3826 pmap_copy_part_page(src_m->phys_page, src_pa,
3827 dst_m->phys_page, dst_pa, len);
3828 }
3829
3830 /*
3831 * vm_page_copy:
3832 *
3833 * Copy one page to another
3834 *
3835 * ENCRYPTED SWAP:
3836 * The source page should not be encrypted. The caller should
3837 * make sure the page is decrypted first, if necessary.
3838 */
3839
3840 int vm_page_copy_cs_validations = 0;
3841 int vm_page_copy_cs_tainted = 0;
3842
3843 void
3844 vm_page_copy(
3845 vm_page_t src_m,
3846 vm_page_t dest_m)
3847 {
3848 XPR(XPR_VM_PAGE,
3849 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3850 src_m->object, src_m->offset,
3851 dest_m->object, dest_m->offset,
3852 0);
3853 #if 0
3854 /*
3855 * we don't hold the page queue lock
3856 * so this check isn't safe to make
3857 */
3858 VM_PAGE_CHECK(src_m);
3859 VM_PAGE_CHECK(dest_m);
3860 #endif
3861 vm_object_lock_assert_held(src_m->object);
3862
3863 /*
3864 * ENCRYPTED SWAP:
3865 * The source page should not be encrypted at this point.
3866 * The destination page will therefore not contain encrypted
3867 * data after the copy.
3868 */
3869 if (src_m->encrypted) {
3870 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3871 }
3872 dest_m->encrypted = FALSE;
3873
3874 if (src_m->object != VM_OBJECT_NULL &&
3875 src_m->object->code_signed) {
3876 /*
3877 * We're copying a page from a code-signed object.
3878 * Whoever ends up mapping the copy page might care about
3879 * the original page's integrity, so let's validate the
3880 * source page now.
3881 */
3882 vm_page_copy_cs_validations++;
3883 vm_page_validate_cs(src_m);
3884 }
3885
3886 if (vm_page_is_slideable(src_m)) {
3887 boolean_t was_busy = src_m->busy;
3888 src_m->busy = TRUE;
3889 (void) vm_page_slide(src_m, 0);
3890 assert(src_m->busy);
3891 if (!was_busy) {
3892 PAGE_WAKEUP_DONE(src_m);
3893 }
3894 }
3895
3896 /*
3897 * Propagate the cs_tainted bit to the copy page. Do not propagate
3898 * the cs_validated bit.
3899 */
3900 dest_m->cs_tainted = src_m->cs_tainted;
3901 if (dest_m->cs_tainted) {
3902 vm_page_copy_cs_tainted++;
3903 }
3904 dest_m->slid = src_m->slid;
3905 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3906 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3907 }
3908
3909 #if MACH_ASSERT
3910 static void
3911 _vm_page_print(
3912 vm_page_t p)
3913 {
3914 printf("vm_page %p: \n", p);
3915 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3916 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3917 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3918 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3919 printf(" wire_count=%u\n", p->wire_count);
3920
3921 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3922 (p->local ? "" : "!"),
3923 (p->inactive ? "" : "!"),
3924 (p->active ? "" : "!"),
3925 (p->pageout_queue ? "" : "!"),
3926 (p->speculative ? "" : "!"),
3927 (p->laundry ? "" : "!"));
3928 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3929 (p->free ? "" : "!"),
3930 (p->reference ? "" : "!"),
3931 (p->gobbled ? "" : "!"),
3932 (p->private ? "" : "!"),
3933 (p->throttled ? "" : "!"));
3934 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3935 (p->busy ? "" : "!"),
3936 (p->wanted ? "" : "!"),
3937 (p->tabled ? "" : "!"),
3938 (p->fictitious ? "" : "!"),
3939 (p->pmapped ? "" : "!"),
3940 (p->wpmapped ? "" : "!"));
3941 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3942 (p->pageout ? "" : "!"),
3943 (p->absent ? "" : "!"),
3944 (p->error ? "" : "!"),
3945 (p->dirty ? "" : "!"),
3946 (p->cleaning ? "" : "!"),
3947 (p->precious ? "" : "!"),
3948 (p->clustered ? "" : "!"));
3949 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3950 (p->overwriting ? "" : "!"),
3951 (p->restart ? "" : "!"),
3952 (p->unusual ? "" : "!"),
3953 (p->encrypted ? "" : "!"),
3954 (p->encrypted_cleaning ? "" : "!"));
3955 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
3956 (p->cs_validated ? "" : "!"),
3957 (p->cs_tainted ? "" : "!"),
3958 (p->cs_nx ? "" : "!"),
3959 (p->no_cache ? "" : "!"));
3960
3961 printf("phys_page=0x%x\n", p->phys_page);
3962 }
3963
3964 /*
3965 * Check that the list of pages is ordered by
3966 * ascending physical address and has no holes.
3967 */
3968 static int
3969 vm_page_verify_contiguous(
3970 vm_page_t pages,
3971 unsigned int npages)
3972 {
3973 register vm_page_t m;
3974 unsigned int page_count;
3975 vm_offset_t prev_addr;
3976
3977 prev_addr = pages->phys_page;
3978 page_count = 1;
3979 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3980 if (m->phys_page != prev_addr + 1) {
3981 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3982 m, (long)prev_addr, m->phys_page);
3983 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3984 panic("vm_page_verify_contiguous: not contiguous!");
3985 }
3986 prev_addr = m->phys_page;
3987 ++page_count;
3988 }
3989 if (page_count != npages) {
3990 printf("pages %p actual count 0x%x but requested 0x%x\n",
3991 pages, page_count, npages);
3992 panic("vm_page_verify_contiguous: count error");
3993 }
3994 return 1;
3995 }
3996
3997
3998 /*
3999 * Check the free lists for proper length etc.
4000 */
4001 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
4002 static unsigned int
4003 vm_page_verify_free_list(
4004 queue_head_t *vm_page_queue,
4005 unsigned int color,
4006 vm_page_t look_for_page,
4007 boolean_t expect_page)
4008 {
4009 unsigned int npages;
4010 vm_page_t m;
4011 vm_page_t prev_m;
4012 boolean_t found_page;
4013
4014 if (! vm_page_verify_this_free_list_enabled)
4015 return 0;
4016
4017 found_page = FALSE;
4018 npages = 0;
4019 prev_m = (vm_page_t) vm_page_queue;
4020 queue_iterate(vm_page_queue,
4021 m,
4022 vm_page_t,
4023 pageq) {
4024
4025 if (m == look_for_page) {
4026 found_page = TRUE;
4027 }
4028 if ((vm_page_t) m->pageq.prev != prev_m)
4029 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
4030 color, npages, m, m->pageq.prev, prev_m);
4031 if ( ! m->busy )
4032 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
4033 color, npages, m);
4034 if (color != (unsigned int) -1) {
4035 if ((m->phys_page & vm_color_mask) != color)
4036 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
4037 color, npages, m, m->phys_page & vm_color_mask, color);
4038 if ( ! m->free )
4039 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
4040 color, npages, m);
4041 }
4042 ++npages;
4043 prev_m = m;
4044 }
4045 if (look_for_page != VM_PAGE_NULL) {
4046 unsigned int other_color;
4047
4048 if (expect_page && !found_page) {
4049 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
4050 color, npages, look_for_page, look_for_page->phys_page);
4051 _vm_page_print(look_for_page);
4052 for (other_color = 0;
4053 other_color < vm_colors;
4054 other_color++) {
4055 if (other_color == color)
4056 continue;
4057 vm_page_verify_free_list(&vm_page_queue_free[other_color],
4058 other_color, look_for_page, FALSE);
4059 }
4060 if (color == (unsigned int) -1) {
4061 vm_page_verify_free_list(&vm_lopage_queue_free,
4062 (unsigned int) -1, look_for_page, FALSE);
4063 }
4064 panic("vm_page_verify_free_list(color=%u)\n", color);
4065 }
4066 if (!expect_page && found_page) {
4067 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
4068 color, npages, look_for_page, look_for_page->phys_page);
4069 }
4070 }
4071 return npages;
4072 }
4073
4074 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
4075 static void
4076 vm_page_verify_free_lists( void )
4077 {
4078 unsigned int color, npages, nlopages;
4079 boolean_t toggle = TRUE;
4080
4081 if (! vm_page_verify_all_free_lists_enabled)
4082 return;
4083
4084 npages = 0;
4085
4086 lck_mtx_lock(&vm_page_queue_free_lock);
4087
4088 if (vm_page_verify_this_free_list_enabled == TRUE) {
4089 /*
4090 * This variable has been set globally for extra checking of
4091 * each free list Q. Since we didn't set it, we don't own it
4092 * and we shouldn't toggle it.
4093 */
4094 toggle = FALSE;
4095 }
4096
4097 if (toggle == TRUE) {
4098 vm_page_verify_this_free_list_enabled = TRUE;
4099 }
4100
4101 for( color = 0; color < vm_colors; color++ ) {
4102 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
4103 color, VM_PAGE_NULL, FALSE);
4104 }
4105 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
4106 (unsigned int) -1,
4107 VM_PAGE_NULL, FALSE);
4108 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
4109 panic("vm_page_verify_free_lists: "
4110 "npages %u free_count %d nlopages %u lo_free_count %u",
4111 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
4112
4113 if (toggle == TRUE) {
4114 vm_page_verify_this_free_list_enabled = FALSE;
4115 }
4116
4117 lck_mtx_unlock(&vm_page_queue_free_lock);
4118 }
4119
4120 void
4121 vm_page_queues_assert(
4122 vm_page_t mem,
4123 int val)
4124 {
4125 #if DEBUG
4126 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
4127 #endif
4128 if (mem->free + mem->active + mem->inactive + mem->speculative +
4129 mem->throttled + mem->pageout_queue > (val)) {
4130 _vm_page_print(mem);
4131 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
4132 }
4133 if (VM_PAGE_WIRED(mem)) {
4134 assert(!mem->active);
4135 assert(!mem->inactive);
4136 assert(!mem->speculative);
4137 assert(!mem->throttled);
4138 assert(!mem->pageout_queue);
4139 }
4140 }
4141 #endif /* MACH_ASSERT */
4142
4143
4144
4145
4146
4147 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4148
4149 /*
4150 * CONTIGUOUS PAGE ALLOCATION
4151 *
4152 * Find a region large enough to contain at least n pages
4153 * of contiguous physical memory.
4154 *
4155 * This is done by traversing the vm_page_t array in a linear fashion
4156 * we assume that the vm_page_t array has the avaiable physical pages in an
4157 * ordered, ascending list... this is currently true of all our implementations
4158 * and must remain so... there can be 'holes' in the array... we also can
4159 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4160 * which use to happen via 'vm_page_convert'... that function was no longer
4161 * being called and was removed...
4162 *
4163 * The basic flow consists of stabilizing some of the interesting state of
4164 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4165 * sweep at the beginning of the array looking for pages that meet our criterea
4166 * for a 'stealable' page... currently we are pretty conservative... if the page
4167 * meets this criterea and is physically contiguous to the previous page in the 'run'
4168 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4169 * and start to develop a new run... if at this point we've already considered
4170 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4171 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4172 * to other threads trying to acquire free pages (or move pages from q to q),
4173 * and then continue from the spot we left off... we only make 1 pass through the
4174 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4175 * which steals the pages from the queues they're currently on... pages on the free
4176 * queue can be stolen directly... pages that are on any of the other queues
4177 * must be removed from the object they are tabled on... this requires taking the
4178 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4179 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4180 * dump the pages we've currently stolen back to the free list, and pick up our
4181 * scan from the point where we aborted the 'current' run.
4182 *
4183 *
4184 * Requirements:
4185 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4186 *
4187 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4188 *
4189 * Algorithm:
4190 */
4191
4192 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4193
4194
4195 #define RESET_STATE_OF_RUN() \
4196 MACRO_BEGIN \
4197 prevcontaddr = -2; \
4198 start_pnum = -1; \
4199 free_considered = 0; \
4200 substitute_needed = 0; \
4201 npages = 0; \
4202 MACRO_END
4203
4204 /*
4205 * Can we steal in-use (i.e. not free) pages when searching for
4206 * physically-contiguous pages ?
4207 */
4208 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4209
4210 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4211 #if DEBUG
4212 int vm_page_find_contig_debug = 0;
4213 #endif
4214
4215 static vm_page_t
4216 vm_page_find_contiguous(
4217 unsigned int contig_pages,
4218 ppnum_t max_pnum,
4219 ppnum_t pnum_mask,
4220 boolean_t wire,
4221 int flags)
4222 {
4223 vm_page_t m = NULL;
4224 ppnum_t prevcontaddr;
4225 ppnum_t start_pnum;
4226 unsigned int npages, considered, scanned;
4227 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4228 unsigned int idx_last_contig_page_found = 0;
4229 int free_considered, free_available;
4230 int substitute_needed;
4231 boolean_t wrapped, zone_gc_called = FALSE;
4232 #if DEBUG
4233 clock_sec_t tv_start_sec, tv_end_sec;
4234 clock_usec_t tv_start_usec, tv_end_usec;
4235 #endif
4236
4237 int yielded = 0;
4238 int dumped_run = 0;
4239 int stolen_pages = 0;
4240 int compressed_pages = 0;
4241
4242
4243 if (contig_pages == 0)
4244 return VM_PAGE_NULL;
4245
4246 full_scan_again:
4247
4248 #if MACH_ASSERT
4249 vm_page_verify_free_lists();
4250 #endif
4251 #if DEBUG
4252 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4253 #endif
4254 PAGE_REPLACEMENT_ALLOWED(TRUE);
4255
4256 vm_page_lock_queues();
4257
4258
4259 lck_mtx_lock(&vm_page_queue_free_lock);
4260
4261 RESET_STATE_OF_RUN();
4262
4263 scanned = 0;
4264 considered = 0;
4265 free_available = vm_page_free_count - vm_page_free_reserved;
4266
4267 wrapped = FALSE;
4268
4269 if(flags & KMA_LOMEM)
4270 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4271 else
4272 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4273
4274 orig_last_idx = idx_last_contig_page_found;
4275 last_idx = orig_last_idx;
4276
4277 for (page_idx = last_idx, start_idx = last_idx;
4278 npages < contig_pages && page_idx < vm_pages_count;
4279 page_idx++) {
4280 retry:
4281 if (wrapped &&
4282 npages == 0 &&
4283 page_idx >= orig_last_idx) {
4284 /*
4285 * We're back where we started and we haven't
4286 * found any suitable contiguous range. Let's
4287 * give up.
4288 */
4289 break;
4290 }
4291 scanned++;
4292 m = &vm_pages[page_idx];
4293
4294 assert(!m->fictitious);
4295 assert(!m->private);
4296
4297 if (max_pnum && m->phys_page > max_pnum) {
4298 /* no more low pages... */
4299 break;
4300 }
4301 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4302 /*
4303 * not aligned
4304 */
4305 RESET_STATE_OF_RUN();
4306
4307 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4308 m->encrypted_cleaning ||
4309 m->pageout_queue || m->laundry || m->wanted ||
4310 m->cleaning || m->overwriting || m->pageout) {
4311 /*
4312 * page is in a transient state
4313 * or a state we don't want to deal
4314 * with, so don't consider it which
4315 * means starting a new run
4316 */
4317 RESET_STATE_OF_RUN();
4318
4319 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4320 /*
4321 * page needs to be on one of our queues
4322 * or it needs to belong to the compressor pool
4323 * in order for it to be stable behind the
4324 * locks we hold at this point...
4325 * if not, don't consider it which
4326 * means starting a new run
4327 */
4328 RESET_STATE_OF_RUN();
4329
4330 } else if (!m->free && (!m->tabled || m->busy)) {
4331 /*
4332 * pages on the free list are always 'busy'
4333 * so we couldn't test for 'busy' in the check
4334 * for the transient states... pages that are
4335 * 'free' are never 'tabled', so we also couldn't
4336 * test for 'tabled'. So we check here to make
4337 * sure that a non-free page is not busy and is
4338 * tabled on an object...
4339 * if not, don't consider it which
4340 * means starting a new run
4341 */
4342 RESET_STATE_OF_RUN();
4343
4344 } else {
4345 if (m->phys_page != prevcontaddr + 1) {
4346 if ((m->phys_page & pnum_mask) != 0) {
4347 RESET_STATE_OF_RUN();
4348 goto did_consider;
4349 } else {
4350 npages = 1;
4351 start_idx = page_idx;
4352 start_pnum = m->phys_page;
4353 }
4354 } else {
4355 npages++;
4356 }
4357 prevcontaddr = m->phys_page;
4358
4359 VM_PAGE_CHECK(m);
4360 if (m->free) {
4361 free_considered++;
4362 } else {
4363 /*
4364 * This page is not free.
4365 * If we can't steal used pages,
4366 * we have to give up this run
4367 * and keep looking.
4368 * Otherwise, we might need to
4369 * move the contents of this page
4370 * into a substitute page.
4371 */
4372 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4373 if (m->pmapped || m->dirty || m->precious) {
4374 substitute_needed++;
4375 }
4376 #else
4377 RESET_STATE_OF_RUN();
4378 #endif
4379 }
4380
4381 if ((free_considered + substitute_needed) > free_available) {
4382 /*
4383 * if we let this run continue
4384 * we will end up dropping the vm_page_free_count
4385 * below the reserve limit... we need to abort
4386 * this run, but we can at least re-consider this
4387 * page... thus the jump back to 'retry'
4388 */
4389 RESET_STATE_OF_RUN();
4390
4391 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4392 considered++;
4393 goto retry;
4394 }
4395 /*
4396 * free_available == 0
4397 * so can't consider any free pages... if
4398 * we went to retry in this case, we'd
4399 * get stuck looking at the same page
4400 * w/o making any forward progress
4401 * we also want to take this path if we've already
4402 * reached our limit that controls the lock latency
4403 */
4404 }
4405 }
4406 did_consider:
4407 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4408
4409 PAGE_REPLACEMENT_ALLOWED(FALSE);
4410
4411 lck_mtx_unlock(&vm_page_queue_free_lock);
4412 vm_page_unlock_queues();
4413
4414 mutex_pause(0);
4415
4416 PAGE_REPLACEMENT_ALLOWED(TRUE);
4417
4418 vm_page_lock_queues();
4419 lck_mtx_lock(&vm_page_queue_free_lock);
4420
4421 RESET_STATE_OF_RUN();
4422 /*
4423 * reset our free page limit since we
4424 * dropped the lock protecting the vm_page_free_queue
4425 */
4426 free_available = vm_page_free_count - vm_page_free_reserved;
4427 considered = 0;
4428
4429 yielded++;
4430
4431 goto retry;
4432 }
4433 considered++;
4434 }
4435 m = VM_PAGE_NULL;
4436
4437 if (npages != contig_pages) {
4438 if (!wrapped) {
4439 /*
4440 * We didn't find a contiguous range but we didn't
4441 * start from the very first page.
4442 * Start again from the very first page.
4443 */
4444 RESET_STATE_OF_RUN();
4445 if( flags & KMA_LOMEM)
4446 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4447 else
4448 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4449 last_idx = 0;
4450 page_idx = last_idx;
4451 wrapped = TRUE;
4452 goto retry;
4453 }
4454 lck_mtx_unlock(&vm_page_queue_free_lock);
4455 } else {
4456 vm_page_t m1;
4457 vm_page_t m2;
4458 unsigned int cur_idx;
4459 unsigned int tmp_start_idx;
4460 vm_object_t locked_object = VM_OBJECT_NULL;
4461 boolean_t abort_run = FALSE;
4462
4463 assert(page_idx - start_idx == contig_pages);
4464
4465 tmp_start_idx = start_idx;
4466
4467 /*
4468 * first pass through to pull the free pages
4469 * off of the free queue so that in case we
4470 * need substitute pages, we won't grab any
4471 * of the free pages in the run... we'll clear
4472 * the 'free' bit in the 2nd pass, and even in
4473 * an abort_run case, we'll collect all of the
4474 * free pages in this run and return them to the free list
4475 */
4476 while (start_idx < page_idx) {
4477
4478 m1 = &vm_pages[start_idx++];
4479
4480 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4481 assert(m1->free);
4482 #endif
4483
4484 if (m1->free) {
4485 unsigned int color;
4486
4487 color = m1->phys_page & vm_color_mask;
4488 #if MACH_ASSERT
4489 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4490 #endif
4491 queue_remove(&vm_page_queue_free[color],
4492 m1,
4493 vm_page_t,
4494 pageq);
4495 m1->pageq.next = NULL;
4496 m1->pageq.prev = NULL;
4497 #if MACH_ASSERT
4498 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4499 #endif
4500 /*
4501 * Clear the "free" bit so that this page
4502 * does not get considered for another
4503 * concurrent physically-contiguous allocation.
4504 */
4505 m1->free = FALSE;
4506 assert(m1->busy);
4507
4508 vm_page_free_count--;
4509 }
4510 }
4511 if( flags & KMA_LOMEM)
4512 vm_page_lomem_find_contiguous_last_idx = page_idx;
4513 else
4514 vm_page_find_contiguous_last_idx = page_idx;
4515
4516 /*
4517 * we can drop the free queue lock at this point since
4518 * we've pulled any 'free' candidates off of the list
4519 * we need it dropped so that we can do a vm_page_grab
4520 * when substituing for pmapped/dirty pages
4521 */
4522 lck_mtx_unlock(&vm_page_queue_free_lock);
4523
4524 start_idx = tmp_start_idx;
4525 cur_idx = page_idx - 1;
4526
4527 while (start_idx++ < page_idx) {
4528 /*
4529 * must go through the list from back to front
4530 * so that the page list is created in the
4531 * correct order - low -> high phys addresses
4532 */
4533 m1 = &vm_pages[cur_idx--];
4534
4535 assert(!m1->free);
4536
4537 if (m1->object == VM_OBJECT_NULL) {
4538 /*
4539 * page has already been removed from
4540 * the free list in the 1st pass
4541 */
4542 assert(m1->offset == (vm_object_offset_t) -1);
4543 assert(m1->busy);
4544 assert(!m1->wanted);
4545 assert(!m1->laundry);
4546 } else {
4547 vm_object_t object;
4548 int refmod;
4549 boolean_t disconnected, reusable;
4550
4551 if (abort_run == TRUE)
4552 continue;
4553
4554 object = m1->object;
4555
4556 if (object != locked_object) {
4557 if (locked_object) {
4558 vm_object_unlock(locked_object);
4559 locked_object = VM_OBJECT_NULL;
4560 }
4561 if (vm_object_lock_try(object))
4562 locked_object = object;
4563 }
4564 if (locked_object == VM_OBJECT_NULL ||
4565 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4566 m1->encrypted_cleaning ||
4567 m1->pageout_queue || m1->laundry || m1->wanted ||
4568 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4569
4570 if (locked_object) {
4571 vm_object_unlock(locked_object);
4572 locked_object = VM_OBJECT_NULL;
4573 }
4574 tmp_start_idx = cur_idx;
4575 abort_run = TRUE;
4576 continue;
4577 }
4578
4579 disconnected = FALSE;
4580 reusable = FALSE;
4581
4582 if ((m1->reusable ||
4583 m1->object->all_reusable) &&
4584 m1->inactive &&
4585 !m1->dirty &&
4586 !m1->reference) {
4587 /* reusable page... */
4588 refmod = pmap_disconnect(m1->phys_page);
4589 disconnected = TRUE;
4590 if (refmod == 0) {
4591 /*
4592 * ... not reused: can steal
4593 * without relocating contents.
4594 */
4595 reusable = TRUE;
4596 }
4597 }
4598
4599 if ((m1->pmapped &&
4600 ! reusable) ||
4601 m1->dirty ||
4602 m1->precious) {
4603 vm_object_offset_t offset;
4604
4605 m2 = vm_page_grab();
4606
4607 if (m2 == VM_PAGE_NULL) {
4608 if (locked_object) {
4609 vm_object_unlock(locked_object);
4610 locked_object = VM_OBJECT_NULL;
4611 }
4612 tmp_start_idx = cur_idx;
4613 abort_run = TRUE;
4614 continue;
4615 }
4616 if (! disconnected) {
4617 if (m1->pmapped)
4618 refmod = pmap_disconnect(m1->phys_page);
4619 else
4620 refmod = 0;
4621 }
4622
4623 /* copy the page's contents */
4624 pmap_copy_page(m1->phys_page, m2->phys_page);
4625 /* copy the page's state */
4626 assert(!VM_PAGE_WIRED(m1));
4627 assert(!m1->free);
4628 assert(!m1->pageout_queue);
4629 assert(!m1->laundry);
4630 m2->reference = m1->reference;
4631 assert(!m1->gobbled);
4632 assert(!m1->private);
4633 m2->no_cache = m1->no_cache;
4634 m2->xpmapped = 0;
4635 assert(!m1->busy);
4636 assert(!m1->wanted);
4637 assert(!m1->fictitious);
4638 m2->pmapped = m1->pmapped; /* should flush cache ? */
4639 m2->wpmapped = m1->wpmapped;
4640 assert(!m1->pageout);
4641 m2->absent = m1->absent;
4642 m2->error = m1->error;
4643 m2->dirty = m1->dirty;
4644 assert(!m1->cleaning);
4645 m2->precious = m1->precious;
4646 m2->clustered = m1->clustered;
4647 assert(!m1->overwriting);
4648 m2->restart = m1->restart;
4649 m2->unusual = m1->unusual;
4650 m2->encrypted = m1->encrypted;
4651 assert(!m1->encrypted_cleaning);
4652 m2->cs_validated = m1->cs_validated;
4653 m2->cs_tainted = m1->cs_tainted;
4654 m2->cs_nx = m1->cs_nx;
4655
4656 /*
4657 * If m1 had really been reusable,
4658 * we would have just stolen it, so
4659 * let's not propagate it's "reusable"
4660 * bit and assert that m2 is not
4661 * marked as "reusable".
4662 */
4663 // m2->reusable = m1->reusable;
4664 assert(!m2->reusable);
4665
4666 assert(!m1->lopage);
4667 m2->slid = m1->slid;
4668 m2->compressor = m1->compressor;
4669
4670 /*
4671 * page may need to be flushed if
4672 * it is marshalled into a UPL
4673 * that is going to be used by a device
4674 * that doesn't support coherency
4675 */
4676 m2->written_by_kernel = TRUE;
4677
4678 /*
4679 * make sure we clear the ref/mod state
4680 * from the pmap layer... else we risk
4681 * inheriting state from the last time
4682 * this page was used...
4683 */
4684 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4685
4686 if (refmod & VM_MEM_REFERENCED)
4687 m2->reference = TRUE;
4688 if (refmod & VM_MEM_MODIFIED) {
4689 SET_PAGE_DIRTY(m2, TRUE);
4690 }
4691 offset = m1->offset;
4692
4693 /*
4694 * completely cleans up the state
4695 * of the page so that it is ready
4696 * to be put onto the free list, or
4697 * for this purpose it looks like it
4698 * just came off of the free list
4699 */
4700 vm_page_free_prepare(m1);
4701
4702 /*
4703 * now put the substitute page
4704 * on the object
4705 */
4706 vm_page_insert_internal(m2, locked_object, offset, VM_KERN_MEMORY_NONE, TRUE, TRUE, FALSE, FALSE, NULL);
4707
4708 if (m2->compressor) {
4709 m2->pmapped = TRUE;
4710 m2->wpmapped = TRUE;
4711
4712 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4713 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4714
4715 compressed_pages++;
4716
4717 } else {
4718 if (m2->reference)
4719 vm_page_activate(m2);
4720 else
4721 vm_page_deactivate(m2);
4722 }
4723 PAGE_WAKEUP_DONE(m2);
4724
4725 } else {
4726 assert(!m1->compressor);
4727
4728 /*
4729 * completely cleans up the state
4730 * of the page so that it is ready
4731 * to be put onto the free list, or
4732 * for this purpose it looks like it
4733 * just came off of the free list
4734 */
4735 vm_page_free_prepare(m1);
4736 }
4737
4738 stolen_pages++;
4739
4740 }
4741 m1->pageq.next = (queue_entry_t) m;
4742 m1->pageq.prev = NULL;
4743 m = m1;
4744 }
4745 if (locked_object) {
4746 vm_object_unlock(locked_object);
4747 locked_object = VM_OBJECT_NULL;
4748 }
4749
4750 if (abort_run == TRUE) {
4751 if (m != VM_PAGE_NULL) {
4752 vm_page_free_list(m, FALSE);
4753 }
4754
4755 dumped_run++;
4756
4757 /*
4758 * want the index of the last
4759 * page in this run that was
4760 * successfully 'stolen', so back
4761 * it up 1 for the auto-decrement on use
4762 * and 1 more to bump back over this page
4763 */
4764 page_idx = tmp_start_idx + 2;
4765 if (page_idx >= vm_pages_count) {
4766 if (wrapped)
4767 goto done_scanning;
4768 page_idx = last_idx = 0;
4769 wrapped = TRUE;
4770 }
4771 abort_run = FALSE;
4772
4773 /*
4774 * We didn't find a contiguous range but we didn't
4775 * start from the very first page.
4776 * Start again from the very first page.
4777 */
4778 RESET_STATE_OF_RUN();
4779
4780 if( flags & KMA_LOMEM)
4781 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4782 else
4783 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4784
4785 last_idx = page_idx;
4786
4787 lck_mtx_lock(&vm_page_queue_free_lock);
4788 /*
4789 * reset our free page limit since we
4790 * dropped the lock protecting the vm_page_free_queue
4791 */
4792 free_available = vm_page_free_count - vm_page_free_reserved;
4793 goto retry;
4794 }
4795
4796 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4797
4798 if (wire == TRUE)
4799 m1->wire_count++;
4800 else
4801 m1->gobbled = TRUE;
4802 }
4803 if (wire == FALSE)
4804 vm_page_gobble_count += npages;
4805
4806 /*
4807 * gobbled pages are also counted as wired pages
4808 */
4809 vm_page_wire_count += npages;
4810
4811 assert(vm_page_verify_contiguous(m, npages));
4812 }
4813 done_scanning:
4814 PAGE_REPLACEMENT_ALLOWED(FALSE);
4815
4816 vm_page_unlock_queues();
4817
4818 #if DEBUG
4819 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4820
4821 tv_end_sec -= tv_start_sec;
4822 if (tv_end_usec < tv_start_usec) {
4823 tv_end_sec--;
4824 tv_end_usec += 1000000;
4825 }
4826 tv_end_usec -= tv_start_usec;
4827 if (tv_end_usec >= 1000000) {
4828 tv_end_sec++;
4829 tv_end_sec -= 1000000;
4830 }
4831 if (vm_page_find_contig_debug) {
4832 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4833 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4834 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4835 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4836 }
4837
4838 #endif
4839 #if MACH_ASSERT
4840 vm_page_verify_free_lists();
4841 #endif
4842 if (m == NULL && zone_gc_called == FALSE) {
4843 printf("%s(num=%d,low=%d): found %d pages at 0x%llx...scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages... wired count is %d\n",
4844 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4845 scanned, yielded, dumped_run, stolen_pages, compressed_pages, vm_page_wire_count);
4846
4847 if (consider_buffer_cache_collect != NULL) {
4848 (void)(*consider_buffer_cache_collect)(1);
4849 }
4850
4851 consider_zone_gc(TRUE);
4852
4853 zone_gc_called = TRUE;
4854
4855 printf("vm_page_find_contiguous: zone_gc called... wired count is %d\n", vm_page_wire_count);
4856 goto full_scan_again;
4857 }
4858
4859 return m;
4860 }
4861
4862 /*
4863 * Allocate a list of contiguous, wired pages.
4864 */
4865 kern_return_t
4866 cpm_allocate(
4867 vm_size_t size,
4868 vm_page_t *list,
4869 ppnum_t max_pnum,
4870 ppnum_t pnum_mask,
4871 boolean_t wire,
4872 int flags)
4873 {
4874 vm_page_t pages;
4875 unsigned int npages;
4876
4877 if (size % PAGE_SIZE != 0)
4878 return KERN_INVALID_ARGUMENT;
4879
4880 npages = (unsigned int) (size / PAGE_SIZE);
4881 if (npages != size / PAGE_SIZE) {
4882 /* 32-bit overflow */
4883 return KERN_INVALID_ARGUMENT;
4884 }
4885
4886 /*
4887 * Obtain a pointer to a subset of the free
4888 * list large enough to satisfy the request;
4889 * the region will be physically contiguous.
4890 */
4891 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4892
4893 if (pages == VM_PAGE_NULL)
4894 return KERN_NO_SPACE;
4895 /*
4896 * determine need for wakeups
4897 */
4898 if ((vm_page_free_count < vm_page_free_min) ||
4899 ((vm_page_free_count < vm_page_free_target) &&
4900 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4901 thread_wakeup((event_t) &vm_page_free_wanted);
4902
4903 VM_CHECK_MEMORYSTATUS;
4904
4905 /*
4906 * The CPM pages should now be available and
4907 * ordered by ascending physical address.
4908 */
4909 assert(vm_page_verify_contiguous(pages, npages));
4910
4911 *list = pages;
4912 return KERN_SUCCESS;
4913 }
4914
4915
4916 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4917
4918 /*
4919 * when working on a 'run' of pages, it is necessary to hold
4920 * the vm_page_queue_lock (a hot global lock) for certain operations
4921 * on the page... however, the majority of the work can be done
4922 * while merely holding the object lock... in fact there are certain
4923 * collections of pages that don't require any work brokered by the
4924 * vm_page_queue_lock... to mitigate the time spent behind the global
4925 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4926 * while doing all of the work that doesn't require the vm_page_queue_lock...
4927 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4928 * necessary work for each page... we will grab the busy bit on the page
4929 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4930 * if it can't immediately take the vm_page_queue_lock in order to compete
4931 * for the locks in the same order that vm_pageout_scan takes them.
4932 * the operation names are modeled after the names of the routines that
4933 * need to be called in order to make the changes very obvious in the
4934 * original loop
4935 */
4936
4937 void
4938 vm_page_do_delayed_work(
4939 vm_object_t object,
4940 vm_tag_t tag,
4941 struct vm_page_delayed_work *dwp,
4942 int dw_count)
4943 {
4944 int j;
4945 vm_page_t m;
4946 vm_page_t local_free_q = VM_PAGE_NULL;
4947
4948 /*
4949 * pageout_scan takes the vm_page_lock_queues first
4950 * then tries for the object lock... to avoid what
4951 * is effectively a lock inversion, we'll go to the
4952 * trouble of taking them in that same order... otherwise
4953 * if this object contains the majority of the pages resident
4954 * in the UBC (or a small set of large objects actively being
4955 * worked on contain the majority of the pages), we could
4956 * cause the pageout_scan thread to 'starve' in its attempt
4957 * to find pages to move to the free queue, since it has to
4958 * successfully acquire the object lock of any candidate page
4959 * before it can steal/clean it.
4960 */
4961 if (!vm_page_trylockspin_queues()) {
4962 vm_object_unlock(object);
4963
4964 vm_page_lockspin_queues();
4965
4966 for (j = 0; ; j++) {
4967 if (!vm_object_lock_avoid(object) &&
4968 _vm_object_lock_try(object))
4969 break;
4970 vm_page_unlock_queues();
4971 mutex_pause(j);
4972 vm_page_lockspin_queues();
4973 }
4974 }
4975 for (j = 0; j < dw_count; j++, dwp++) {
4976
4977 m = dwp->dw_m;
4978
4979 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4980 vm_pageout_throttle_up(m);
4981 #if CONFIG_PHANTOM_CACHE
4982 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4983 vm_phantom_cache_update(m);
4984 #endif
4985 if (dwp->dw_mask & DW_vm_page_wire)
4986 vm_page_wire(m, tag, FALSE);
4987 else if (dwp->dw_mask & DW_vm_page_unwire) {
4988 boolean_t queueit;
4989
4990 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4991
4992 vm_page_unwire(m, queueit);
4993 }
4994 if (dwp->dw_mask & DW_vm_page_free) {
4995 vm_page_free_prepare_queues(m);
4996
4997 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4998 /*
4999 * Add this page to our list of reclaimed pages,
5000 * to be freed later.
5001 */
5002 m->pageq.next = (queue_entry_t) local_free_q;
5003 local_free_q = m;
5004 } else {
5005 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
5006 vm_page_deactivate_internal(m, FALSE);
5007 else if (dwp->dw_mask & DW_vm_page_activate) {
5008 if (m->active == FALSE) {
5009 vm_page_activate(m);
5010 }
5011 }
5012 else if (dwp->dw_mask & DW_vm_page_speculate)
5013 vm_page_speculate(m, TRUE);
5014 else if (dwp->dw_mask & DW_enqueue_cleaned) {
5015 /*
5016 * if we didn't hold the object lock and did this,
5017 * we might disconnect the page, then someone might
5018 * soft fault it back in, then we would put it on the
5019 * cleaned queue, and so we would have a referenced (maybe even dirty)
5020 * page on that queue, which we don't want
5021 */
5022 int refmod_state = pmap_disconnect(m->phys_page);
5023
5024 if ((refmod_state & VM_MEM_REFERENCED)) {
5025 /*
5026 * this page has been touched since it got cleaned; let's activate it
5027 * if it hasn't already been
5028 */
5029 vm_pageout_enqueued_cleaned++;
5030 vm_pageout_cleaned_reactivated++;
5031 vm_pageout_cleaned_commit_reactivated++;
5032
5033 if (m->active == FALSE)
5034 vm_page_activate(m);
5035 } else {
5036 m->reference = FALSE;
5037 vm_page_enqueue_cleaned(m);
5038 }
5039 }
5040 else if (dwp->dw_mask & DW_vm_page_lru)
5041 vm_page_lru(m);
5042 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
5043 if ( !m->pageout_queue)
5044 vm_page_queues_remove(m);
5045 }
5046 if (dwp->dw_mask & DW_set_reference)
5047 m->reference = TRUE;
5048 else if (dwp->dw_mask & DW_clear_reference)
5049 m->reference = FALSE;
5050
5051 if (dwp->dw_mask & DW_move_page) {
5052 if ( !m->pageout_queue) {
5053 vm_page_queues_remove(m);
5054
5055 assert(m->object != kernel_object);
5056
5057 vm_page_enqueue_inactive(m, FALSE);
5058 }
5059 }
5060 if (dwp->dw_mask & DW_clear_busy)
5061 m->busy = FALSE;
5062
5063 if (dwp->dw_mask & DW_PAGE_WAKEUP)
5064 PAGE_WAKEUP(m);
5065 }
5066 }
5067 vm_page_unlock_queues();
5068
5069 if (local_free_q)
5070 vm_page_free_list(local_free_q, TRUE);
5071
5072 VM_CHECK_MEMORYSTATUS;
5073
5074 }
5075
5076 kern_return_t
5077 vm_page_alloc_list(
5078 int page_count,
5079 int flags,
5080 vm_page_t *list)
5081 {
5082 vm_page_t lo_page_list = VM_PAGE_NULL;
5083 vm_page_t mem;
5084 int i;
5085
5086 if ( !(flags & KMA_LOMEM))
5087 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
5088
5089 for (i = 0; i < page_count; i++) {
5090
5091 mem = vm_page_grablo();
5092
5093 if (mem == VM_PAGE_NULL) {
5094 if (lo_page_list)
5095 vm_page_free_list(lo_page_list, FALSE);
5096
5097 *list = VM_PAGE_NULL;
5098
5099 return (KERN_RESOURCE_SHORTAGE);
5100 }
5101 mem->pageq.next = (queue_entry_t) lo_page_list;
5102 lo_page_list = mem;
5103 }
5104 *list = lo_page_list;
5105
5106 return (KERN_SUCCESS);
5107 }
5108
5109 void
5110 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
5111 {
5112 page->offset = offset;
5113 }
5114
5115 vm_page_t
5116 vm_page_get_next(vm_page_t page)
5117 {
5118 return ((vm_page_t) page->pageq.next);
5119 }
5120
5121 vm_object_offset_t
5122 vm_page_get_offset(vm_page_t page)
5123 {
5124 return (page->offset);
5125 }
5126
5127 ppnum_t
5128 vm_page_get_phys_page(vm_page_t page)
5129 {
5130 return (page->phys_page);
5131 }
5132
5133
5134 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
5135
5136 #if HIBERNATION
5137
5138 static vm_page_t hibernate_gobble_queue;
5139
5140 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
5141 static int hibernate_flush_dirty_pages(int);
5142 static int hibernate_flush_queue(queue_head_t *, int);
5143
5144 void hibernate_flush_wait(void);
5145 void hibernate_mark_in_progress(void);
5146 void hibernate_clear_in_progress(void);
5147
5148 void hibernate_free_range(int, int);
5149 void hibernate_hash_insert_page(vm_page_t);
5150 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
5151 void hibernate_rebuild_vm_structs(void);
5152 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
5153 ppnum_t hibernate_lookup_paddr(unsigned int);
5154
5155 struct hibernate_statistics {
5156 int hibernate_considered;
5157 int hibernate_reentered_on_q;
5158 int hibernate_found_dirty;
5159 int hibernate_skipped_cleaning;
5160 int hibernate_skipped_transient;
5161 int hibernate_skipped_precious;
5162 int hibernate_skipped_external;
5163 int hibernate_queue_nolock;
5164 int hibernate_queue_paused;
5165 int hibernate_throttled;
5166 int hibernate_throttle_timeout;
5167 int hibernate_drained;
5168 int hibernate_drain_timeout;
5169 int cd_lock_failed;
5170 int cd_found_precious;
5171 int cd_found_wired;
5172 int cd_found_busy;
5173 int cd_found_unusual;
5174 int cd_found_cleaning;
5175 int cd_found_laundry;
5176 int cd_found_dirty;
5177 int cd_found_xpmapped;
5178 int cd_skipped_xpmapped;
5179 int cd_local_free;
5180 int cd_total_free;
5181 int cd_vm_page_wire_count;
5182 int cd_vm_struct_pages_unneeded;
5183 int cd_pages;
5184 int cd_discarded;
5185 int cd_count_wire;
5186 } hibernate_stats;
5187
5188
5189 /*
5190 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5191 * so that we don't overrun the estimated image size, which would
5192 * result in a hibernation failure.
5193 */
5194 #define HIBERNATE_XPMAPPED_LIMIT 40000
5195
5196
5197 static int
5198 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5199 {
5200 wait_result_t wait_result;
5201
5202 vm_page_lock_queues();
5203
5204 while ( !queue_empty(&q->pgo_pending) ) {
5205
5206 q->pgo_draining = TRUE;
5207
5208 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5209
5210 vm_page_unlock_queues();
5211
5212 wait_result = thread_block(THREAD_CONTINUE_NULL);
5213
5214 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5215 hibernate_stats.hibernate_drain_timeout++;
5216
5217 if (q == &vm_pageout_queue_external)
5218 return (0);
5219
5220 return (1);
5221 }
5222 vm_page_lock_queues();
5223
5224 hibernate_stats.hibernate_drained++;
5225 }
5226 vm_page_unlock_queues();
5227
5228 return (0);
5229 }
5230
5231
5232 boolean_t hibernate_skip_external = FALSE;
5233
5234 static int
5235 hibernate_flush_queue(queue_head_t *q, int qcount)
5236 {
5237 vm_page_t m;
5238 vm_object_t l_object = NULL;
5239 vm_object_t m_object = NULL;
5240 int refmod_state = 0;
5241 int try_failed_count = 0;
5242 int retval = 0;
5243 int current_run = 0;
5244 struct vm_pageout_queue *iq;
5245 struct vm_pageout_queue *eq;
5246 struct vm_pageout_queue *tq;
5247
5248
5249 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5250
5251 iq = &vm_pageout_queue_internal;
5252 eq = &vm_pageout_queue_external;
5253
5254 vm_page_lock_queues();
5255
5256 while (qcount && !queue_empty(q)) {
5257
5258 if (current_run++ == 1000) {
5259 if (hibernate_should_abort()) {
5260 retval = 1;
5261 break;
5262 }
5263 current_run = 0;
5264 }
5265
5266 m = (vm_page_t) queue_first(q);
5267 m_object = m->object;
5268
5269 /*
5270 * check to see if we currently are working
5271 * with the same object... if so, we've
5272 * already got the lock
5273 */
5274 if (m_object != l_object) {
5275 /*
5276 * the object associated with candidate page is
5277 * different from the one we were just working
5278 * with... dump the lock if we still own it
5279 */
5280 if (l_object != NULL) {
5281 vm_object_unlock(l_object);
5282 l_object = NULL;
5283 }
5284 /*
5285 * Try to lock object; since we've alread got the
5286 * page queues lock, we can only 'try' for this one.
5287 * if the 'try' fails, we need to do a mutex_pause
5288 * to allow the owner of the object lock a chance to
5289 * run...
5290 */
5291 if ( !vm_object_lock_try_scan(m_object)) {
5292
5293 if (try_failed_count > 20) {
5294 hibernate_stats.hibernate_queue_nolock++;
5295
5296 goto reenter_pg_on_q;
5297 }
5298
5299 vm_page_unlock_queues();
5300 mutex_pause(try_failed_count++);
5301 vm_page_lock_queues();
5302
5303 hibernate_stats.hibernate_queue_paused++;
5304 continue;
5305 } else {
5306 l_object = m_object;
5307 }
5308 }
5309 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5310 /*
5311 * page is not to be cleaned
5312 * put it back on the head of its queue
5313 */
5314 if (m->cleaning)
5315 hibernate_stats.hibernate_skipped_cleaning++;
5316 else
5317 hibernate_stats.hibernate_skipped_transient++;
5318
5319 goto reenter_pg_on_q;
5320 }
5321 if (m_object->copy == VM_OBJECT_NULL) {
5322 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5323 /*
5324 * let the normal hibernate image path
5325 * deal with these
5326 */
5327 goto reenter_pg_on_q;
5328 }
5329 }
5330 if ( !m->dirty && m->pmapped) {
5331 refmod_state = pmap_get_refmod(m->phys_page);
5332
5333 if ((refmod_state & VM_MEM_MODIFIED)) {
5334 SET_PAGE_DIRTY(m, FALSE);
5335 }
5336 } else
5337 refmod_state = 0;
5338
5339 if ( !m->dirty) {
5340 /*
5341 * page is not to be cleaned
5342 * put it back on the head of its queue
5343 */
5344 if (m->precious)
5345 hibernate_stats.hibernate_skipped_precious++;
5346
5347 goto reenter_pg_on_q;
5348 }
5349
5350 if (hibernate_skip_external == TRUE && !m_object->internal) {
5351
5352 hibernate_stats.hibernate_skipped_external++;
5353
5354 goto reenter_pg_on_q;
5355 }
5356 tq = NULL;
5357
5358 if (m_object->internal) {
5359 if (VM_PAGE_Q_THROTTLED(iq))
5360 tq = iq;
5361 } else if (VM_PAGE_Q_THROTTLED(eq))
5362 tq = eq;
5363
5364 if (tq != NULL) {
5365 wait_result_t wait_result;
5366 int wait_count = 5;
5367
5368 if (l_object != NULL) {
5369 vm_object_unlock(l_object);
5370 l_object = NULL;
5371 }
5372
5373 while (retval == 0) {
5374
5375 tq->pgo_throttled = TRUE;
5376
5377 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5378
5379 vm_page_unlock_queues();
5380
5381 wait_result = thread_block(THREAD_CONTINUE_NULL);
5382
5383 vm_page_lock_queues();
5384
5385 if (wait_result != THREAD_TIMED_OUT)
5386 break;
5387 if (!VM_PAGE_Q_THROTTLED(tq))
5388 break;
5389
5390 if (hibernate_should_abort())
5391 retval = 1;
5392
5393 if (--wait_count == 0) {
5394
5395 hibernate_stats.hibernate_throttle_timeout++;
5396
5397 if (tq == eq) {
5398 hibernate_skip_external = TRUE;
5399 break;
5400 }
5401 retval = 1;
5402 }
5403 }
5404 if (retval)
5405 break;
5406
5407 hibernate_stats.hibernate_throttled++;
5408
5409 continue;
5410 }
5411 /*
5412 * we've already factored out pages in the laundry which
5413 * means this page can't be on the pageout queue so it's
5414 * safe to do the vm_page_queues_remove
5415 */
5416 assert(!m->pageout_queue);
5417
5418 vm_page_queues_remove(m);
5419
5420 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5421 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5422
5423 (void)vm_pageout_cluster(m, FALSE, FALSE, FALSE);
5424
5425 hibernate_stats.hibernate_found_dirty++;
5426
5427 goto next_pg;
5428
5429 reenter_pg_on_q:
5430 queue_remove(q, m, vm_page_t, pageq);
5431 queue_enter(q, m, vm_page_t, pageq);
5432
5433 hibernate_stats.hibernate_reentered_on_q++;
5434 next_pg:
5435 hibernate_stats.hibernate_considered++;
5436
5437 qcount--;
5438 try_failed_count = 0;
5439 }
5440 if (l_object != NULL) {
5441 vm_object_unlock(l_object);
5442 l_object = NULL;
5443 }
5444
5445 vm_page_unlock_queues();
5446
5447 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5448
5449 return (retval);
5450 }
5451
5452
5453 static int
5454 hibernate_flush_dirty_pages(int pass)
5455 {
5456 struct vm_speculative_age_q *aq;
5457 uint32_t i;
5458
5459 if (vm_page_local_q) {
5460 for (i = 0; i < vm_page_local_q_count; i++)
5461 vm_page_reactivate_local(i, TRUE, FALSE);
5462 }
5463
5464 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5465 int qcount;
5466 vm_page_t m;
5467
5468 aq = &vm_page_queue_speculative[i];
5469
5470 if (queue_empty(&aq->age_q))
5471 continue;
5472 qcount = 0;
5473
5474 vm_page_lockspin_queues();
5475
5476 queue_iterate(&aq->age_q,
5477 m,
5478 vm_page_t,
5479 pageq)
5480 {
5481 qcount++;
5482 }
5483 vm_page_unlock_queues();
5484
5485 if (qcount) {
5486 if (hibernate_flush_queue(&aq->age_q, qcount))
5487 return (1);
5488 }
5489 }
5490 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5491 return (1);
5492 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5493 return (1);
5494 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5495 return (1);
5496 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5497 return (1);
5498
5499 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5500 vm_compressor_record_warmup_start();
5501
5502 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5503 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5504 vm_compressor_record_warmup_end();
5505 return (1);
5506 }
5507 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5508 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5509 vm_compressor_record_warmup_end();
5510 return (1);
5511 }
5512 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5513 vm_compressor_record_warmup_end();
5514
5515 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5516 return (1);
5517
5518 return (0);
5519 }
5520
5521
5522 void
5523 hibernate_reset_stats()
5524 {
5525 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5526 }
5527
5528
5529 int
5530 hibernate_flush_memory()
5531 {
5532 int retval;
5533
5534 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5535
5536 hibernate_cleaning_in_progress = TRUE;
5537 hibernate_skip_external = FALSE;
5538
5539 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5540
5541 if (COMPRESSED_PAGER_IS_ACTIVE) {
5542
5543 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5544
5545 vm_compressor_flush();
5546
5547 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5548 }
5549 if (consider_buffer_cache_collect != NULL) {
5550 unsigned int orig_wire_count;
5551
5552 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5553 orig_wire_count = vm_page_wire_count;
5554
5555 (void)(*consider_buffer_cache_collect)(1);
5556 consider_zone_gc(TRUE);
5557
5558 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5559
5560 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5561 }
5562 }
5563 hibernate_cleaning_in_progress = FALSE;
5564
5565 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5566
5567 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5568 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5569
5570
5571 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5572 hibernate_stats.hibernate_considered,
5573 hibernate_stats.hibernate_reentered_on_q,
5574 hibernate_stats.hibernate_found_dirty);
5575 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5576 hibernate_stats.hibernate_skipped_cleaning,
5577 hibernate_stats.hibernate_skipped_transient,
5578 hibernate_stats.hibernate_skipped_precious,
5579 hibernate_stats.hibernate_skipped_external,
5580 hibernate_stats.hibernate_queue_nolock);
5581 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5582 hibernate_stats.hibernate_queue_paused,
5583 hibernate_stats.hibernate_throttled,
5584 hibernate_stats.hibernate_throttle_timeout,
5585 hibernate_stats.hibernate_drained,
5586 hibernate_stats.hibernate_drain_timeout);
5587
5588 return (retval);
5589 }
5590
5591
5592 static void
5593 hibernate_page_list_zero(hibernate_page_list_t *list)
5594 {
5595 uint32_t bank;
5596 hibernate_bitmap_t * bitmap;
5597
5598 bitmap = &list->bank_bitmap[0];
5599 for (bank = 0; bank < list->bank_count; bank++)
5600 {
5601 uint32_t last_bit;
5602
5603 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5604 // set out-of-bound bits at end of bitmap.
5605 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5606 if (last_bit)
5607 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5608
5609 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5610 }
5611 }
5612
5613 void
5614 hibernate_free_gobble_pages(void)
5615 {
5616 vm_page_t m, next;
5617 uint32_t count = 0;
5618
5619 m = (vm_page_t) hibernate_gobble_queue;
5620 while(m)
5621 {
5622 next = (vm_page_t) m->pageq.next;
5623 vm_page_free(m);
5624 count++;
5625 m = next;
5626 }
5627 hibernate_gobble_queue = VM_PAGE_NULL;
5628
5629 if (count)
5630 HIBLOG("Freed %d pages\n", count);
5631 }
5632
5633 static boolean_t
5634 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5635 {
5636 vm_object_t object = NULL;
5637 int refmod_state;
5638 boolean_t discard = FALSE;
5639
5640 do
5641 {
5642 if (m->private)
5643 panic("hibernate_consider_discard: private");
5644
5645 if (!vm_object_lock_try(m->object)) {
5646 if (!preflight) hibernate_stats.cd_lock_failed++;
5647 break;
5648 }
5649 object = m->object;
5650
5651 if (VM_PAGE_WIRED(m)) {
5652 if (!preflight) hibernate_stats.cd_found_wired++;
5653 break;
5654 }
5655 if (m->precious) {
5656 if (!preflight) hibernate_stats.cd_found_precious++;
5657 break;
5658 }
5659 if (m->busy || !object->alive) {
5660 /*
5661 * Somebody is playing with this page.
5662 */
5663 if (!preflight) hibernate_stats.cd_found_busy++;
5664 break;
5665 }
5666 if (m->absent || m->unusual || m->error) {
5667 /*
5668 * If it's unusual in anyway, ignore it
5669 */
5670 if (!preflight) hibernate_stats.cd_found_unusual++;
5671 break;
5672 }
5673 if (m->cleaning) {
5674 if (!preflight) hibernate_stats.cd_found_cleaning++;
5675 break;
5676 }
5677 if (m->laundry) {
5678 if (!preflight) hibernate_stats.cd_found_laundry++;
5679 break;
5680 }
5681 if (!m->dirty)
5682 {
5683 refmod_state = pmap_get_refmod(m->phys_page);
5684
5685 if (refmod_state & VM_MEM_REFERENCED)
5686 m->reference = TRUE;
5687 if (refmod_state & VM_MEM_MODIFIED) {
5688 SET_PAGE_DIRTY(m, FALSE);
5689 }
5690 }
5691
5692 /*
5693 * If it's clean or purgeable we can discard the page on wakeup.
5694 */
5695 discard = (!m->dirty)
5696 || (VM_PURGABLE_VOLATILE == object->purgable)
5697 || (VM_PURGABLE_EMPTY == object->purgable);
5698
5699
5700 if (discard == FALSE) {
5701 if (!preflight)
5702 hibernate_stats.cd_found_dirty++;
5703 } else if (m->xpmapped && m->reference && !object->internal) {
5704 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5705 if (!preflight)
5706 hibernate_stats.cd_found_xpmapped++;
5707 discard = FALSE;
5708 } else {
5709 if (!preflight)
5710 hibernate_stats.cd_skipped_xpmapped++;
5711 }
5712 }
5713 }
5714 while (FALSE);
5715
5716 if (object)
5717 vm_object_unlock(object);
5718
5719 return (discard);
5720 }
5721
5722
5723 static void
5724 hibernate_discard_page(vm_page_t m)
5725 {
5726 if (m->absent || m->unusual || m->error)
5727 /*
5728 * If it's unusual in anyway, ignore
5729 */
5730 return;
5731
5732 #if MACH_ASSERT || DEBUG
5733 vm_object_t object = m->object;
5734 if (!vm_object_lock_try(m->object))
5735 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5736 #else
5737 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5738 makes sure these locks are uncontended before sleep */
5739 #endif /* MACH_ASSERT || DEBUG */
5740
5741 if (m->pmapped == TRUE)
5742 {
5743 __unused int refmod_state = pmap_disconnect(m->phys_page);
5744 }
5745
5746 if (m->laundry)
5747 panic("hibernate_discard_page(%p) laundry", m);
5748 if (m->private)
5749 panic("hibernate_discard_page(%p) private", m);
5750 if (m->fictitious)
5751 panic("hibernate_discard_page(%p) fictitious", m);
5752
5753 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5754 {
5755 /* object should be on a queue */
5756 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5757 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5758 assert(old_queue);
5759 if (m->object->purgeable_when_ripe) {
5760 vm_purgeable_token_delete_first(old_queue);
5761 }
5762 m->object->purgable = VM_PURGABLE_EMPTY;
5763
5764 /*
5765 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5766 * accounted in the "volatile" ledger, so no change here.
5767 * We have to update vm_page_purgeable_count, though, since we're
5768 * effectively purging this object.
5769 */
5770 unsigned int delta;
5771 assert(m->object->resident_page_count >= m->object->wired_page_count);
5772 delta = (m->object->resident_page_count - m->object->wired_page_count);
5773 assert(vm_page_purgeable_count >= delta);
5774 assert(delta > 0);
5775 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5776 }
5777
5778 vm_page_free(m);
5779
5780 #if MACH_ASSERT || DEBUG
5781 vm_object_unlock(object);
5782 #endif /* MACH_ASSERT || DEBUG */
5783 }
5784
5785 /*
5786 Grab locks for hibernate_page_list_setall()
5787 */
5788 void
5789 hibernate_vm_lock_queues(void)
5790 {
5791 vm_object_lock(compressor_object);
5792 vm_page_lock_queues();
5793 lck_mtx_lock(&vm_page_queue_free_lock);
5794
5795 if (vm_page_local_q) {
5796 uint32_t i;
5797 for (i = 0; i < vm_page_local_q_count; i++) {
5798 struct vpl *lq;
5799 lq = &vm_page_local_q[i].vpl_un.vpl;
5800 VPL_LOCK(&lq->vpl_lock);
5801 }
5802 }
5803 }
5804
5805 void
5806 hibernate_vm_unlock_queues(void)
5807 {
5808 if (vm_page_local_q) {
5809 uint32_t i;
5810 for (i = 0; i < vm_page_local_q_count; i++) {
5811 struct vpl *lq;
5812 lq = &vm_page_local_q[i].vpl_un.vpl;
5813 VPL_UNLOCK(&lq->vpl_lock);
5814 }
5815 }
5816 lck_mtx_unlock(&vm_page_queue_free_lock);
5817 vm_page_unlock_queues();
5818 vm_object_unlock(compressor_object);
5819 }
5820
5821 /*
5822 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5823 pages known to VM to not need saving are subtracted.
5824 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5825 */
5826
5827 void
5828 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5829 hibernate_page_list_t * page_list_wired,
5830 hibernate_page_list_t * page_list_pal,
5831 boolean_t preflight,
5832 boolean_t will_discard,
5833 uint32_t * pagesOut)
5834 {
5835 uint64_t start, end, nsec;
5836 vm_page_t m;
5837 vm_page_t next;
5838 uint32_t pages = page_list->page_count;
5839 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5840 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5841 uint32_t count_wire = pages;
5842 uint32_t count_discard_active = 0;
5843 uint32_t count_discard_inactive = 0;
5844 uint32_t count_discard_cleaned = 0;
5845 uint32_t count_discard_purgeable = 0;
5846 uint32_t count_discard_speculative = 0;
5847 uint32_t count_discard_vm_struct_pages = 0;
5848 uint32_t i;
5849 uint32_t bank;
5850 hibernate_bitmap_t * bitmap;
5851 hibernate_bitmap_t * bitmap_wired;
5852 boolean_t discard_all;
5853 boolean_t discard;
5854
5855 HIBLOG("hibernate_page_list_setall(preflight %d) start\n", preflight);
5856
5857 if (preflight) {
5858 page_list = NULL;
5859 page_list_wired = NULL;
5860 page_list_pal = NULL;
5861 discard_all = FALSE;
5862 } else {
5863 discard_all = will_discard;
5864 }
5865
5866 #if MACH_ASSERT || DEBUG
5867 if (!preflight)
5868 {
5869 vm_page_lock_queues();
5870 if (vm_page_local_q) {
5871 for (i = 0; i < vm_page_local_q_count; i++) {
5872 struct vpl *lq;
5873 lq = &vm_page_local_q[i].vpl_un.vpl;
5874 VPL_LOCK(&lq->vpl_lock);
5875 }
5876 }
5877 }
5878 #endif /* MACH_ASSERT || DEBUG */
5879
5880
5881 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5882
5883 clock_get_uptime(&start);
5884
5885 if (!preflight) {
5886 hibernate_page_list_zero(page_list);
5887 hibernate_page_list_zero(page_list_wired);
5888 hibernate_page_list_zero(page_list_pal);
5889
5890 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5891 hibernate_stats.cd_pages = pages;
5892 }
5893
5894 if (vm_page_local_q) {
5895 for (i = 0; i < vm_page_local_q_count; i++)
5896 vm_page_reactivate_local(i, TRUE, !preflight);
5897 }
5898
5899 if (preflight) {
5900 vm_object_lock(compressor_object);
5901 vm_page_lock_queues();
5902 lck_mtx_lock(&vm_page_queue_free_lock);
5903 }
5904
5905 m = (vm_page_t) hibernate_gobble_queue;
5906 while (m)
5907 {
5908 pages--;
5909 count_wire--;
5910 if (!preflight) {
5911 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5912 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5913 }
5914 m = (vm_page_t) m->pageq.next;
5915 }
5916
5917 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5918 {
5919 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5920 {
5921 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5922 {
5923 pages--;
5924 count_wire--;
5925 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5926 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5927
5928 hibernate_stats.cd_local_free++;
5929 hibernate_stats.cd_total_free++;
5930 }
5931 }
5932 }
5933
5934 for( i = 0; i < vm_colors; i++ )
5935 {
5936 queue_iterate(&vm_page_queue_free[i],
5937 m,
5938 vm_page_t,
5939 pageq)
5940 {
5941 pages--;
5942 count_wire--;
5943 if (!preflight) {
5944 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5945 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5946
5947 hibernate_stats.cd_total_free++;
5948 }
5949 }
5950 }
5951
5952 queue_iterate(&vm_lopage_queue_free,
5953 m,
5954 vm_page_t,
5955 pageq)
5956 {
5957 pages--;
5958 count_wire--;
5959 if (!preflight) {
5960 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5961 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5962
5963 hibernate_stats.cd_total_free++;
5964 }
5965 }
5966
5967 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5968 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5969 {
5970 next = (vm_page_t) m->pageq.next;
5971 discard = FALSE;
5972 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5973 && hibernate_consider_discard(m, preflight))
5974 {
5975 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5976 count_discard_inactive++;
5977 discard = discard_all;
5978 }
5979 else
5980 count_throttled++;
5981 count_wire--;
5982 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5983
5984 if (discard) hibernate_discard_page(m);
5985 m = next;
5986 }
5987
5988 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5989 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5990 {
5991 next = (vm_page_t) m->pageq.next;
5992 discard = FALSE;
5993 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5994 && hibernate_consider_discard(m, preflight))
5995 {
5996 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5997 if (m->dirty)
5998 count_discard_purgeable++;
5999 else
6000 count_discard_inactive++;
6001 discard = discard_all;
6002 }
6003 else
6004 count_anonymous++;
6005 count_wire--;
6006 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6007 if (discard) hibernate_discard_page(m);
6008 m = next;
6009 }
6010
6011 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6012 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6013 {
6014 next = (vm_page_t) m->pageq.next;
6015 discard = FALSE;
6016 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6017 && hibernate_consider_discard(m, preflight))
6018 {
6019 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6020 if (m->dirty)
6021 count_discard_purgeable++;
6022 else
6023 count_discard_cleaned++;
6024 discard = discard_all;
6025 }
6026 else
6027 count_cleaned++;
6028 count_wire--;
6029 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6030 if (discard) hibernate_discard_page(m);
6031 m = next;
6032 }
6033
6034 m = (vm_page_t) queue_first(&vm_page_queue_active);
6035 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6036 {
6037 next = (vm_page_t) m->pageq.next;
6038 discard = FALSE;
6039 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
6040 && hibernate_consider_discard(m, preflight))
6041 {
6042 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6043 if (m->dirty)
6044 count_discard_purgeable++;
6045 else
6046 count_discard_active++;
6047 discard = discard_all;
6048 }
6049 else
6050 count_active++;
6051 count_wire--;
6052 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6053 if (discard) hibernate_discard_page(m);
6054 m = next;
6055 }
6056
6057 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6058 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6059 {
6060 next = (vm_page_t) m->pageq.next;
6061 discard = FALSE;
6062 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6063 && hibernate_consider_discard(m, preflight))
6064 {
6065 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6066 if (m->dirty)
6067 count_discard_purgeable++;
6068 else
6069 count_discard_inactive++;
6070 discard = discard_all;
6071 }
6072 else
6073 count_inactive++;
6074 count_wire--;
6075 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6076 if (discard) hibernate_discard_page(m);
6077 m = next;
6078 }
6079
6080 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6081 {
6082 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6083 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6084 {
6085 next = (vm_page_t) m->pageq.next;
6086 discard = FALSE;
6087 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
6088 && hibernate_consider_discard(m, preflight))
6089 {
6090 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
6091 count_discard_speculative++;
6092 discard = discard_all;
6093 }
6094 else
6095 count_speculative++;
6096 count_wire--;
6097 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6098 if (discard) hibernate_discard_page(m);
6099 m = next;
6100 }
6101 }
6102
6103 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
6104 {
6105 count_compressor++;
6106 count_wire--;
6107 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
6108 }
6109
6110 if (preflight == FALSE && discard_all == TRUE) {
6111 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6112
6113 HIBLOG("hibernate_teardown started\n");
6114 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
6115 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
6116
6117 pages -= count_discard_vm_struct_pages;
6118 count_wire -= count_discard_vm_struct_pages;
6119
6120 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
6121
6122 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6123 }
6124
6125 if (!preflight) {
6126 // pull wired from hibernate_bitmap
6127 bitmap = &page_list->bank_bitmap[0];
6128 bitmap_wired = &page_list_wired->bank_bitmap[0];
6129 for (bank = 0; bank < page_list->bank_count; bank++)
6130 {
6131 for (i = 0; i < bitmap->bitmapwords; i++)
6132 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
6133 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
6134 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
6135 }
6136 }
6137
6138 // machine dependent adjustments
6139 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6140
6141 if (!preflight) {
6142 hibernate_stats.cd_count_wire = count_wire;
6143 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6144 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6145 }
6146
6147 clock_get_uptime(&end);
6148 absolutetime_to_nanoseconds(end - start, &nsec);
6149 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6150
6151 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6152 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6153 discard_all ? "did" : "could",
6154 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6155
6156 if (hibernate_stats.cd_skipped_xpmapped)
6157 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6158
6159 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6160
6161 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6162
6163 #if MACH_ASSERT || DEBUG
6164 if (!preflight)
6165 {
6166 if (vm_page_local_q) {
6167 for (i = 0; i < vm_page_local_q_count; i++) {
6168 struct vpl *lq;
6169 lq = &vm_page_local_q[i].vpl_un.vpl;
6170 VPL_UNLOCK(&lq->vpl_lock);
6171 }
6172 }
6173 vm_page_unlock_queues();
6174 }
6175 #endif /* MACH_ASSERT || DEBUG */
6176
6177 if (preflight) {
6178 lck_mtx_unlock(&vm_page_queue_free_lock);
6179 vm_page_unlock_queues();
6180 vm_object_unlock(compressor_object);
6181 }
6182
6183 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6184 }
6185
6186 void
6187 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6188 {
6189 uint64_t start, end, nsec;
6190 vm_page_t m;
6191 vm_page_t next;
6192 uint32_t i;
6193 uint32_t count_discard_active = 0;
6194 uint32_t count_discard_inactive = 0;
6195 uint32_t count_discard_purgeable = 0;
6196 uint32_t count_discard_cleaned = 0;
6197 uint32_t count_discard_speculative = 0;
6198
6199
6200 #if MACH_ASSERT || DEBUG
6201 vm_page_lock_queues();
6202 if (vm_page_local_q) {
6203 for (i = 0; i < vm_page_local_q_count; i++) {
6204 struct vpl *lq;
6205 lq = &vm_page_local_q[i].vpl_un.vpl;
6206 VPL_LOCK(&lq->vpl_lock);
6207 }
6208 }
6209 #endif /* MACH_ASSERT || DEBUG */
6210
6211 clock_get_uptime(&start);
6212
6213 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6214 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6215 {
6216 next = (vm_page_t) m->pageq.next;
6217 if (hibernate_page_bittst(page_list, m->phys_page))
6218 {
6219 if (m->dirty)
6220 count_discard_purgeable++;
6221 else
6222 count_discard_inactive++;
6223 hibernate_discard_page(m);
6224 }
6225 m = next;
6226 }
6227
6228 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6229 {
6230 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6231 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6232 {
6233 next = (vm_page_t) m->pageq.next;
6234 if (hibernate_page_bittst(page_list, m->phys_page))
6235 {
6236 count_discard_speculative++;
6237 hibernate_discard_page(m);
6238 }
6239 m = next;
6240 }
6241 }
6242
6243 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6244 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6245 {
6246 next = (vm_page_t) m->pageq.next;
6247 if (hibernate_page_bittst(page_list, m->phys_page))
6248 {
6249 if (m->dirty)
6250 count_discard_purgeable++;
6251 else
6252 count_discard_inactive++;
6253 hibernate_discard_page(m);
6254 }
6255 m = next;
6256 }
6257
6258 m = (vm_page_t) queue_first(&vm_page_queue_active);
6259 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6260 {
6261 next = (vm_page_t) m->pageq.next;
6262 if (hibernate_page_bittst(page_list, m->phys_page))
6263 {
6264 if (m->dirty)
6265 count_discard_purgeable++;
6266 else
6267 count_discard_active++;
6268 hibernate_discard_page(m);
6269 }
6270 m = next;
6271 }
6272
6273 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6274 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6275 {
6276 next = (vm_page_t) m->pageq.next;
6277 if (hibernate_page_bittst(page_list, m->phys_page))
6278 {
6279 if (m->dirty)
6280 count_discard_purgeable++;
6281 else
6282 count_discard_cleaned++;
6283 hibernate_discard_page(m);
6284 }
6285 m = next;
6286 }
6287
6288 #if MACH_ASSERT || DEBUG
6289 if (vm_page_local_q) {
6290 for (i = 0; i < vm_page_local_q_count; i++) {
6291 struct vpl *lq;
6292 lq = &vm_page_local_q[i].vpl_un.vpl;
6293 VPL_UNLOCK(&lq->vpl_lock);
6294 }
6295 }
6296 vm_page_unlock_queues();
6297 #endif /* MACH_ASSERT || DEBUG */
6298
6299 clock_get_uptime(&end);
6300 absolutetime_to_nanoseconds(end - start, &nsec);
6301 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6302 nsec / 1000000ULL,
6303 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6304 }
6305
6306 boolean_t hibernate_paddr_map_inited = FALSE;
6307 boolean_t hibernate_rebuild_needed = FALSE;
6308 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
6309 vm_page_t hibernate_rebuild_hash_list = NULL;
6310
6311 unsigned int hibernate_teardown_found_tabled_pages = 0;
6312 unsigned int hibernate_teardown_found_created_pages = 0;
6313 unsigned int hibernate_teardown_found_free_pages = 0;
6314 unsigned int hibernate_teardown_vm_page_free_count;
6315
6316
6317 struct ppnum_mapping {
6318 struct ppnum_mapping *ppnm_next;
6319 ppnum_t ppnm_base_paddr;
6320 unsigned int ppnm_sindx;
6321 unsigned int ppnm_eindx;
6322 };
6323
6324 struct ppnum_mapping *ppnm_head;
6325 struct ppnum_mapping *ppnm_last_found = NULL;
6326
6327
6328 void
6329 hibernate_create_paddr_map()
6330 {
6331 unsigned int i;
6332 ppnum_t next_ppnum_in_run = 0;
6333 struct ppnum_mapping *ppnm = NULL;
6334
6335 if (hibernate_paddr_map_inited == FALSE) {
6336
6337 for (i = 0; i < vm_pages_count; i++) {
6338
6339 if (ppnm)
6340 ppnm->ppnm_eindx = i;
6341
6342 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6343
6344 ppnm = kalloc(sizeof(struct ppnum_mapping));
6345
6346 ppnm->ppnm_next = ppnm_head;
6347 ppnm_head = ppnm;
6348
6349 ppnm->ppnm_sindx = i;
6350 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6351 }
6352 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6353 }
6354 ppnm->ppnm_eindx++;
6355
6356 hibernate_paddr_map_inited = TRUE;
6357 }
6358 }
6359
6360 ppnum_t
6361 hibernate_lookup_paddr(unsigned int indx)
6362 {
6363 struct ppnum_mapping *ppnm = NULL;
6364
6365 ppnm = ppnm_last_found;
6366
6367 if (ppnm) {
6368 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6369 goto done;
6370 }
6371 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6372
6373 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6374 ppnm_last_found = ppnm;
6375 break;
6376 }
6377 }
6378 if (ppnm == NULL)
6379 panic("hibernate_lookup_paddr of %d failed\n", indx);
6380 done:
6381 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6382 }
6383
6384
6385 uint32_t
6386 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6387 {
6388 addr64_t saddr_aligned;
6389 addr64_t eaddr_aligned;
6390 addr64_t addr;
6391 ppnum_t paddr;
6392 unsigned int mark_as_unneeded_pages = 0;
6393
6394 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6395 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6396
6397 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6398
6399 paddr = pmap_find_phys(kernel_pmap, addr);
6400
6401 assert(paddr);
6402
6403 hibernate_page_bitset(page_list, TRUE, paddr);
6404 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6405
6406 mark_as_unneeded_pages++;
6407 }
6408 return (mark_as_unneeded_pages);
6409 }
6410
6411
6412 void
6413 hibernate_hash_insert_page(vm_page_t mem)
6414 {
6415 vm_page_bucket_t *bucket;
6416 int hash_id;
6417
6418 assert(mem->hashed);
6419 assert(mem->object);
6420 assert(mem->offset != (vm_object_offset_t) -1);
6421
6422 /*
6423 * Insert it into the object_object/offset hash table
6424 */
6425 hash_id = vm_page_hash(mem->object, mem->offset);
6426 bucket = &vm_page_buckets[hash_id];
6427
6428 mem->next_m = bucket->page_list;
6429 bucket->page_list = VM_PAGE_PACK_PTR(mem);
6430 }
6431
6432
6433 void
6434 hibernate_free_range(int sindx, int eindx)
6435 {
6436 vm_page_t mem;
6437 unsigned int color;
6438
6439 while (sindx < eindx) {
6440 mem = &vm_pages[sindx];
6441
6442 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6443
6444 mem->lopage = FALSE;
6445 mem->free = TRUE;
6446
6447 color = mem->phys_page & vm_color_mask;
6448 queue_enter_first(&vm_page_queue_free[color],
6449 mem,
6450 vm_page_t,
6451 pageq);
6452 vm_page_free_count++;
6453
6454 sindx++;
6455 }
6456 }
6457
6458
6459 extern void hibernate_rebuild_pmap_structs(void);
6460
6461 void
6462 hibernate_rebuild_vm_structs(void)
6463 {
6464 int cindx, sindx, eindx;
6465 vm_page_t mem, tmem, mem_next;
6466 AbsoluteTime startTime, endTime;
6467 uint64_t nsec;
6468
6469 if (hibernate_rebuild_needed == FALSE)
6470 return;
6471
6472 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6473 HIBLOG("hibernate_rebuild started\n");
6474
6475 clock_get_uptime(&startTime);
6476
6477 hibernate_rebuild_pmap_structs();
6478
6479 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6480 eindx = vm_pages_count;
6481
6482 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6483
6484 mem = &vm_pages[cindx];
6485 /*
6486 * hibernate_teardown_vm_structs leaves the location where
6487 * this vm_page_t must be located in "next".
6488 */
6489 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6490 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6491
6492 sindx = (int)(tmem - &vm_pages[0]);
6493
6494 if (mem != tmem) {
6495 /*
6496 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6497 * so move it back to its real location
6498 */
6499 *tmem = *mem;
6500 mem = tmem;
6501 }
6502 if (mem->hashed)
6503 hibernate_hash_insert_page(mem);
6504 /*
6505 * the 'hole' between this vm_page_t and the previous
6506 * vm_page_t we moved needs to be initialized as
6507 * a range of free vm_page_t's
6508 */
6509 hibernate_free_range(sindx + 1, eindx);
6510
6511 eindx = sindx;
6512 }
6513 if (sindx)
6514 hibernate_free_range(0, sindx);
6515
6516 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6517
6518 /*
6519 * process the list of vm_page_t's that were entered in the hash,
6520 * but were not located in the vm_pages arrary... these are
6521 * vm_page_t's that were created on the fly (i.e. fictitious)
6522 */
6523 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6524 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6525
6526 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6527 hibernate_hash_insert_page(mem);
6528 }
6529 hibernate_rebuild_hash_list = NULL;
6530
6531 clock_get_uptime(&endTime);
6532 SUB_ABSOLUTETIME(&endTime, &startTime);
6533 absolutetime_to_nanoseconds(endTime, &nsec);
6534
6535 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6536
6537 hibernate_rebuild_needed = FALSE;
6538
6539 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6540 }
6541
6542
6543 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6544
6545 uint32_t
6546 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6547 {
6548 unsigned int i;
6549 unsigned int compact_target_indx;
6550 vm_page_t mem, mem_next;
6551 vm_page_bucket_t *bucket;
6552 unsigned int mark_as_unneeded_pages = 0;
6553 unsigned int unneeded_vm_page_bucket_pages = 0;
6554 unsigned int unneeded_vm_pages_pages = 0;
6555 unsigned int unneeded_pmap_pages = 0;
6556 addr64_t start_of_unneeded = 0;
6557 addr64_t end_of_unneeded = 0;
6558
6559
6560 if (hibernate_should_abort())
6561 return (0);
6562
6563 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6564 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6565 vm_page_cleaned_count, compressor_object->resident_page_count);
6566
6567 for (i = 0; i < vm_page_bucket_count; i++) {
6568
6569 bucket = &vm_page_buckets[i];
6570
6571 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6572 assert(mem->hashed);
6573
6574 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6575
6576 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6577 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6578 hibernate_rebuild_hash_list = mem;
6579 }
6580 }
6581 }
6582 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6583 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6584
6585 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6586
6587 compact_target_indx = 0;
6588
6589 for (i = 0; i < vm_pages_count; i++) {
6590
6591 mem = &vm_pages[i];
6592
6593 if (mem->free) {
6594 unsigned int color;
6595
6596 assert(mem->busy);
6597 assert(!mem->lopage);
6598
6599 color = mem->phys_page & vm_color_mask;
6600
6601 queue_remove(&vm_page_queue_free[color],
6602 mem,
6603 vm_page_t,
6604 pageq);
6605 mem->pageq.next = NULL;
6606 mem->pageq.prev = NULL;
6607
6608 vm_page_free_count--;
6609
6610 hibernate_teardown_found_free_pages++;
6611
6612 if ( !vm_pages[compact_target_indx].free)
6613 compact_target_indx = i;
6614 } else {
6615 /*
6616 * record this vm_page_t's original location
6617 * we need this even if it doesn't get moved
6618 * as an indicator to the rebuild function that
6619 * we don't have to move it
6620 */
6621 mem->next_m = VM_PAGE_PACK_PTR(mem);
6622
6623 if (vm_pages[compact_target_indx].free) {
6624 /*
6625 * we've got a hole to fill, so
6626 * move this vm_page_t to it's new home
6627 */
6628 vm_pages[compact_target_indx] = *mem;
6629 mem->free = TRUE;
6630
6631 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6632 compact_target_indx++;
6633 } else
6634 hibernate_teardown_last_valid_compact_indx = i;
6635 }
6636 }
6637 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6638 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6639 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6640
6641 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6642
6643 if (start_of_unneeded) {
6644 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6645 mark_as_unneeded_pages += unneeded_pmap_pages;
6646 }
6647 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6648
6649 hibernate_rebuild_needed = TRUE;
6650
6651 return (mark_as_unneeded_pages);
6652 }
6653
6654
6655 #endif /* HIBERNATION */
6656
6657 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6658
6659 #include <mach_vm_debug.h>
6660 #if MACH_VM_DEBUG
6661
6662 #include <mach_debug/hash_info.h>
6663 #include <vm/vm_debug.h>
6664
6665 /*
6666 * Routine: vm_page_info
6667 * Purpose:
6668 * Return information about the global VP table.
6669 * Fills the buffer with as much information as possible
6670 * and returns the desired size of the buffer.
6671 * Conditions:
6672 * Nothing locked. The caller should provide
6673 * possibly-pageable memory.
6674 */
6675
6676 unsigned int
6677 vm_page_info(
6678 hash_info_bucket_t *info,
6679 unsigned int count)
6680 {
6681 unsigned int i;
6682 lck_spin_t *bucket_lock;
6683
6684 if (vm_page_bucket_count < count)
6685 count = vm_page_bucket_count;
6686
6687 for (i = 0; i < count; i++) {
6688 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6689 unsigned int bucket_count = 0;
6690 vm_page_t m;
6691
6692 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6693 lck_spin_lock(bucket_lock);
6694
6695 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6696 bucket_count++;
6697
6698 lck_spin_unlock(bucket_lock);
6699
6700 /* don't touch pageable memory while holding locks */
6701 info[i].hib_count = bucket_count;
6702 }
6703
6704 return vm_page_bucket_count;
6705 }
6706 #endif /* MACH_VM_DEBUG */
6707
6708 #if VM_PAGE_BUCKETS_CHECK
6709 void
6710 vm_page_buckets_check(void)
6711 {
6712 unsigned int i;
6713 vm_page_t p;
6714 unsigned int p_hash;
6715 vm_page_bucket_t *bucket;
6716 lck_spin_t *bucket_lock;
6717
6718 if (!vm_page_buckets_check_ready) {
6719 return;
6720 }
6721
6722 #if HIBERNATION
6723 if (hibernate_rebuild_needed ||
6724 hibernate_rebuild_hash_list) {
6725 panic("BUCKET_CHECK: hibernation in progress: "
6726 "rebuild_needed=%d rebuild_hash_list=%p\n",
6727 hibernate_rebuild_needed,
6728 hibernate_rebuild_hash_list);
6729 }
6730 #endif /* HIBERNATION */
6731
6732 #if VM_PAGE_FAKE_BUCKETS
6733 char *cp;
6734 for (cp = (char *) vm_page_fake_buckets_start;
6735 cp < (char *) vm_page_fake_buckets_end;
6736 cp++) {
6737 if (*cp != 0x5a) {
6738 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6739 "[0x%llx:0x%llx]\n",
6740 cp,
6741 (uint64_t) vm_page_fake_buckets_start,
6742 (uint64_t) vm_page_fake_buckets_end);
6743 }
6744 }
6745 #endif /* VM_PAGE_FAKE_BUCKETS */
6746
6747 for (i = 0; i < vm_page_bucket_count; i++) {
6748 bucket = &vm_page_buckets[i];
6749 if (!bucket->page_list) {
6750 continue;
6751 }
6752
6753 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6754 lck_spin_lock(bucket_lock);
6755 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6756 while (p != VM_PAGE_NULL) {
6757 if (!p->hashed) {
6758 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6759 "hash %d in bucket %d at %p "
6760 "is not hashed\n",
6761 p, p->object, p->offset,
6762 p_hash, i, bucket);
6763 }
6764 p_hash = vm_page_hash(p->object, p->offset);
6765 if (p_hash != i) {
6766 panic("BUCKET_CHECK: corruption in bucket %d "
6767 "at %p: page %p object %p offset 0x%llx "
6768 "hash %d\n",
6769 i, bucket, p, p->object, p->offset,
6770 p_hash);
6771 }
6772 p = VM_PAGE_UNPACK_PTR(p->next_m);
6773 }
6774 lck_spin_unlock(bucket_lock);
6775 }
6776
6777 // printf("BUCKET_CHECK: checked buckets\n");
6778 }
6779 #endif /* VM_PAGE_BUCKETS_CHECK */
6780
6781 /*
6782 * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
6783 * local queues if they exist... its the only spot in the system where we add pages
6784 * to those queues... once on those queues, those pages can only move to one of the
6785 * global page queues or the free queues... they NEVER move from local q to local q.
6786 * the 'local' state is stable when vm_page_queues_remove is called since we're behind
6787 * the global vm_page_queue_lock at this point... we still need to take the local lock
6788 * in case this operation is being run on a different CPU then the local queue's identity,
6789 * but we don't have to worry about the page moving to a global queue or becoming wired
6790 * while we're grabbing the local lock since those operations would require the global
6791 * vm_page_queue_lock to be held, and we already own it.
6792 *
6793 * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
6794 * 'wired' and local are ALWAYS mutually exclusive conditions.
6795 */
6796 void
6797 vm_page_queues_remove(vm_page_t mem)
6798 {
6799 boolean_t was_pageable;
6800
6801 VM_PAGE_QUEUES_ASSERT(mem, 1);
6802 assert(!mem->pageout_queue);
6803 /*
6804 * if (mem->pageout_queue)
6805 * NOTE: vm_page_queues_remove does not deal with removing pages from the pageout queue...
6806 * the caller is responsible for determing if the page is on that queue, and if so, must
6807 * either first remove it (it needs both the page queues lock and the object lock to do
6808 * this via vm_pageout_steal_laundry), or avoid the call to vm_page_queues_remove
6809 */
6810 if (mem->local) {
6811 struct vpl *lq;
6812 assert(mem->object != kernel_object);
6813 assert(mem->object != compressor_object);
6814 assert(!mem->inactive && !mem->speculative);
6815 assert(!mem->active && !mem->throttled);
6816 assert(!mem->clean_queue);
6817 assert(!mem->fictitious);
6818 lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;
6819 VPL_LOCK(&lq->vpl_lock);
6820 queue_remove(&lq->vpl_queue,
6821 mem, vm_page_t, pageq);
6822 mem->local = FALSE;
6823 mem->local_id = 0;
6824 lq->vpl_count--;
6825 if (mem->object->internal) {
6826 lq->vpl_internal_count--;
6827 } else {
6828 lq->vpl_external_count--;
6829 }
6830 VPL_UNLOCK(&lq->vpl_lock);
6831 was_pageable = FALSE;
6832 }
6833
6834 else if (mem->active) {
6835 assert(mem->object != kernel_object);
6836 assert(mem->object != compressor_object);
6837 assert(!mem->inactive && !mem->speculative);
6838 assert(!mem->clean_queue);
6839 assert(!mem->throttled);
6840 assert(!mem->fictitious);
6841 queue_remove(&vm_page_queue_active,
6842 mem, vm_page_t, pageq);
6843 mem->active = FALSE;
6844 vm_page_active_count--;
6845 was_pageable = TRUE;
6846 }
6847
6848 else if (mem->inactive) {
6849 assert(mem->object != kernel_object);
6850 assert(mem->object != compressor_object);
6851 assert(!mem->active && !mem->speculative);
6852 assert(!mem->throttled);
6853 assert(!mem->fictitious);
6854 vm_page_inactive_count--;
6855 if (mem->clean_queue) {
6856 queue_remove(&vm_page_queue_cleaned,
6857 mem, vm_page_t, pageq);
6858 mem->clean_queue = FALSE;
6859 vm_page_cleaned_count--;
6860 } else {
6861 if (mem->object->internal) {
6862 queue_remove(&vm_page_queue_anonymous,
6863 mem, vm_page_t, pageq);
6864 vm_page_anonymous_count--;
6865 } else {
6866 queue_remove(&vm_page_queue_inactive,
6867 mem, vm_page_t, pageq);
6868 }
6869 vm_purgeable_q_advance_all();
6870 }
6871 mem->inactive = FALSE;
6872 was_pageable = TRUE;
6873 }
6874
6875 else if (mem->throttled) {
6876 assert(mem->object != compressor_object);
6877 assert(!mem->active && !mem->inactive);
6878 assert(!mem->speculative);
6879 assert(!mem->fictitious);
6880 queue_remove(&vm_page_queue_throttled,
6881 mem, vm_page_t, pageq);
6882 mem->throttled = FALSE;
6883 vm_page_throttled_count--;
6884 was_pageable = FALSE;
6885 }
6886
6887 else if (mem->speculative) {
6888 assert(mem->object != compressor_object);
6889 assert(!mem->active && !mem->inactive);
6890 assert(!mem->throttled);
6891 assert(!mem->fictitious);
6892 remque(&mem->pageq);
6893 mem->speculative = FALSE;
6894 vm_page_speculative_count--;
6895 was_pageable = TRUE;
6896 }
6897
6898 else if (mem->pageq.next || mem->pageq.prev) {
6899 was_pageable = FALSE;
6900 panic("vm_page_queues_remove: unmarked page on Q");
6901 } else {
6902 was_pageable = FALSE;
6903 }
6904
6905 mem->pageq.next = NULL;
6906 mem->pageq.prev = NULL;
6907 VM_PAGE_QUEUES_ASSERT(mem, 0);
6908 if (was_pageable) {
6909 if (mem->object->internal) {
6910 vm_page_pageable_internal_count--;
6911 } else {
6912 vm_page_pageable_external_count--;
6913 }
6914 }
6915 }
6916
6917 void
6918 vm_page_remove_internal(vm_page_t page)
6919 {
6920 vm_object_t __object = page->object;
6921 if (page == __object->memq_hint) {
6922 vm_page_t __new_hint;
6923 queue_entry_t __qe;
6924 __qe = queue_next(&page->listq);
6925 if (queue_end(&__object->memq, __qe)) {
6926 __qe = queue_prev(&page->listq);
6927 if (queue_end(&__object->memq, __qe)) {
6928 __qe = NULL;
6929 }
6930 }
6931 __new_hint = (vm_page_t) __qe;
6932 __object->memq_hint = __new_hint;
6933 }
6934 queue_remove(&__object->memq, page, vm_page_t, listq);
6935 }
6936
6937 void
6938 vm_page_enqueue_inactive(vm_page_t mem, boolean_t first)
6939 {
6940 VM_PAGE_QUEUES_ASSERT(mem, 0);
6941 assert(!mem->fictitious);
6942 assert(!mem->laundry);
6943 assert(!mem->pageout_queue);
6944 vm_page_check_pageable_safe(mem);
6945 if (mem->object->internal) {
6946 if (first == TRUE)
6947 queue_enter_first(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
6948 else
6949 queue_enter(&vm_page_queue_anonymous, mem, vm_page_t, pageq);
6950 vm_page_anonymous_count++;
6951 vm_page_pageable_internal_count++;
6952 } else {
6953 if (first == TRUE)
6954 queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq);
6955 else
6956 queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);
6957 vm_page_pageable_external_count++;
6958 }
6959 mem->inactive = TRUE;
6960 vm_page_inactive_count++;
6961 token_new_pagecount++;
6962 }
6963
6964 /*
6965 * Pages from special kernel objects shouldn't
6966 * be placed on pageable queues.
6967 */
6968 void
6969 vm_page_check_pageable_safe(vm_page_t page)
6970 {
6971 if (page->object == kernel_object) {
6972 panic("vm_page_check_pageable_safe: trying to add page" \
6973 "from kernel object (%p) to pageable queue", kernel_object);
6974 }
6975
6976 if (page->object == compressor_object) {
6977 panic("vm_page_check_pageable_safe: trying to add page" \
6978 "from compressor object (%p) to pageable queue", compressor_object);
6979 }
6980
6981 if (page->object == vm_submap_object) {
6982 panic("vm_page_check_pageable_safe: trying to add page" \
6983 "from submap object (%p) to pageable queue", vm_submap_object);
6984 }
6985 }
6986
6987 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
6988 * wired page diagnose
6989 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6990
6991 #include <libkern/OSKextLibPrivate.h>
6992
6993 vm_allocation_site_t *
6994 vm_allocation_sites[VM_KERN_MEMORY_COUNT];
6995
6996 vm_tag_t
6997 vm_tag_bt(void)
6998 {
6999 uintptr_t* frameptr;
7000 uintptr_t* frameptr_next;
7001 uintptr_t retaddr;
7002 uintptr_t kstackb, kstackt;
7003 const vm_allocation_site_t * site;
7004 thread_t cthread;
7005
7006 cthread = current_thread();
7007 if (__improbable(cthread == NULL)) return VM_KERN_MEMORY_OSFMK;
7008
7009 kstackb = cthread->kernel_stack;
7010 kstackt = kstackb + kernel_stack_size;
7011
7012 /* Load stack frame pointer (EBP on x86) into frameptr */
7013 frameptr = __builtin_frame_address(0);
7014 site = NULL;
7015 while (frameptr != NULL)
7016 {
7017 /* Verify thread stack bounds */
7018 if (((uintptr_t)(frameptr + 2) > kstackt) || ((uintptr_t)frameptr < kstackb)) break;
7019
7020 /* Next frame pointer is pointed to by the previous one */
7021 frameptr_next = (uintptr_t*) *frameptr;
7022
7023 /* Pull return address from one spot above the frame pointer */
7024 retaddr = *(frameptr + 1);
7025
7026 if ((retaddr < vm_kernel_stext) || (retaddr > vm_kernel_top))
7027 {
7028 site = OSKextGetAllocationSiteForCaller(retaddr);
7029 break;
7030 }
7031
7032 frameptr = frameptr_next;
7033 }
7034 return (site ? site->tag : VM_KERN_MEMORY_NONE);
7035 }
7036
7037 static uint64_t free_tag_bits[256/64];
7038
7039 void
7040 vm_tag_alloc_locked(vm_allocation_site_t * site)
7041 {
7042 vm_tag_t tag;
7043 uint64_t avail;
7044 uint64_t idx;
7045
7046 if (site->tag) return;
7047
7048 idx = 0;
7049 while (TRUE)
7050 {
7051 avail = free_tag_bits[idx];
7052 if (avail)
7053 {
7054 tag = __builtin_clzll(avail);
7055 avail &= ~(1ULL << (63 - tag));
7056 free_tag_bits[idx] = avail;
7057 tag += (idx << 6);
7058 break;
7059 }
7060 idx++;
7061 if (idx >= (sizeof(free_tag_bits) / sizeof(free_tag_bits[0])))
7062 {
7063 tag = VM_KERN_MEMORY_ANY;
7064 break;
7065 }
7066 }
7067 site->tag = tag;
7068 if (VM_KERN_MEMORY_ANY != tag)
7069 {
7070 assert(!vm_allocation_sites[tag]);
7071 vm_allocation_sites[tag] = site;
7072 }
7073 }
7074
7075 static void
7076 vm_tag_free_locked(vm_tag_t tag)
7077 {
7078 uint64_t avail;
7079 uint32_t idx;
7080 uint64_t bit;
7081
7082 if (VM_KERN_MEMORY_ANY == tag) return;
7083
7084 idx = (tag >> 6);
7085 avail = free_tag_bits[idx];
7086 tag &= 63;
7087 bit = (1ULL << (63 - tag));
7088 assert(!(avail & bit));
7089 free_tag_bits[idx] = (avail | bit);
7090 }
7091
7092 static void
7093 vm_tag_init(void)
7094 {
7095 vm_tag_t tag;
7096 for (tag = VM_KERN_MEMORY_FIRST_DYNAMIC; tag < VM_KERN_MEMORY_ANY; tag++)
7097 {
7098 vm_tag_free_locked(tag);
7099 }
7100 }
7101
7102 vm_tag_t
7103 vm_tag_alloc(vm_allocation_site_t * site)
7104 {
7105 vm_tag_t tag;
7106
7107 if (VM_TAG_BT & site->flags)
7108 {
7109 tag = vm_tag_bt();
7110 if (VM_KERN_MEMORY_NONE != tag) return (tag);
7111 }
7112
7113 if (!site->tag)
7114 {
7115 lck_spin_lock(&vm_allocation_sites_lock);
7116 vm_tag_alloc_locked(site);
7117 lck_spin_unlock(&vm_allocation_sites_lock);
7118 }
7119
7120 return (site->tag);
7121 }
7122
7123 static void
7124 vm_page_count_object(mach_memory_info_t * sites, unsigned int __unused num_sites, vm_object_t object)
7125 {
7126 if (!object->wired_page_count) return;
7127 if (object != kernel_object)
7128 {
7129 assert(object->wire_tag < num_sites);
7130 sites[object->wire_tag].size += ptoa_64(object->wired_page_count);
7131 }
7132 }
7133
7134 typedef void (*vm_page_iterate_proc)(mach_memory_info_t * sites,
7135 unsigned int num_sites, vm_object_t object);
7136
7137 static void
7138 vm_page_iterate_purgeable_objects(mach_memory_info_t * sites, unsigned int num_sites,
7139 vm_page_iterate_proc proc, purgeable_q_t queue,
7140 int group)
7141 {
7142 vm_object_t object;
7143
7144 for (object = (vm_object_t) queue_first(&queue->objq[group]);
7145 !queue_end(&queue->objq[group], (queue_entry_t) object);
7146 object = (vm_object_t) queue_next(&object->objq))
7147 {
7148 proc(sites, num_sites, object);
7149 }
7150 }
7151
7152 static void
7153 vm_page_iterate_objects(mach_memory_info_t * sites, unsigned int num_sites,
7154 vm_page_iterate_proc proc)
7155 {
7156 purgeable_q_t volatile_q;
7157 queue_head_t * nonvolatile_q;
7158 vm_object_t object;
7159 int group;
7160
7161 lck_spin_lock(&vm_objects_wired_lock);
7162 queue_iterate(&vm_objects_wired,
7163 object,
7164 vm_object_t,
7165 objq)
7166 {
7167 proc(sites, num_sites, object);
7168 }
7169 lck_spin_unlock(&vm_objects_wired_lock);
7170
7171 lck_mtx_lock(&vm_purgeable_queue_lock);
7172 nonvolatile_q = &purgeable_nonvolatile_queue;
7173 for (object = (vm_object_t) queue_first(nonvolatile_q);
7174 !queue_end(nonvolatile_q, (queue_entry_t) object);
7175 object = (vm_object_t) queue_next(&object->objq))
7176 {
7177 proc(sites, num_sites, object);
7178 }
7179
7180 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
7181 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, 0);
7182
7183 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
7184 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
7185 {
7186 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
7187 }
7188
7189 volatile_q = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
7190 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
7191 {
7192 vm_page_iterate_purgeable_objects(sites, num_sites, proc, volatile_q, group);
7193 }
7194 lck_mtx_unlock(&vm_purgeable_queue_lock);
7195 }
7196
7197 static uint64_t
7198 process_account(mach_memory_info_t * sites, unsigned int __unused num_sites)
7199 {
7200 uint64_t found;
7201 unsigned int idx;
7202 vm_allocation_site_t * site;
7203
7204 assert(num_sites >= VM_KERN_MEMORY_COUNT);
7205 found = 0;
7206 for (idx = 0; idx < VM_KERN_MEMORY_COUNT; idx++)
7207 {
7208 found += sites[idx].size;
7209 if (idx < VM_KERN_MEMORY_FIRST_DYNAMIC)
7210 {
7211 sites[idx].site = idx;
7212 sites[idx].flags |= VM_KERN_SITE_TAG;
7213 if (VM_KERN_MEMORY_ZONE == idx) sites[idx].flags |= VM_KERN_SITE_HIDE;
7214 else sites[idx].flags |= VM_KERN_SITE_WIRED;
7215 continue;
7216 }
7217 lck_spin_lock(&vm_allocation_sites_lock);
7218 if ((site = vm_allocation_sites[idx]))
7219 {
7220 if (sites[idx].size)
7221 {
7222 sites[idx].flags |= VM_KERN_SITE_WIRED;
7223 if (VM_TAG_KMOD == (VM_KERN_SITE_TYPE & site->flags))
7224 {
7225 sites[idx].site = OSKextGetKmodIDForSite(site);
7226 sites[idx].flags |= VM_KERN_SITE_KMOD;
7227 }
7228 else
7229 {
7230 sites[idx].site = VM_KERNEL_UNSLIDE(site);
7231 sites[idx].flags |= VM_KERN_SITE_KERNEL;
7232 }
7233 site = NULL;
7234 }
7235 else
7236 {
7237 vm_tag_free_locked(site->tag);
7238 site->tag = VM_KERN_MEMORY_NONE;
7239 vm_allocation_sites[idx] = NULL;
7240 if (!(VM_TAG_UNLOAD & site->flags)) site = NULL;
7241 }
7242 }
7243 lck_spin_unlock(&vm_allocation_sites_lock);
7244 if (site) OSKextFreeSite(site);
7245 }
7246 return (found);
7247 }
7248
7249 kern_return_t
7250 vm_page_diagnose(mach_memory_info_t * sites, unsigned int num_sites)
7251 {
7252 enum { kMaxKernelDepth = 1 };
7253 vm_map_t maps [kMaxKernelDepth];
7254 vm_map_entry_t entries[kMaxKernelDepth];
7255 vm_map_t map;
7256 vm_map_entry_t entry;
7257 vm_object_offset_t offset;
7258 vm_page_t page;
7259 int stackIdx, count;
7260 uint64_t wired_size;
7261 uint64_t wired_managed_size;
7262 uint64_t wired_reserved_size;
7263 mach_memory_info_t * counts;
7264
7265 bzero(sites, num_sites * sizeof(mach_memory_info_t));
7266
7267 vm_page_iterate_objects(sites, num_sites, &vm_page_count_object);
7268
7269 wired_size = ptoa_64(vm_page_wire_count + vm_lopage_free_count + vm_page_throttled_count);
7270 wired_reserved_size = ptoa_64(vm_page_wire_count_initial - vm_page_stolen_count + vm_page_throttled_count);
7271 wired_managed_size = ptoa_64(vm_page_wire_count - vm_page_wire_count_initial);
7272
7273 assert(num_sites >= (VM_KERN_MEMORY_COUNT + VM_KERN_COUNTER_COUNT));
7274 counts = &sites[VM_KERN_MEMORY_COUNT];
7275
7276 #define SET_COUNT(xcount, xsize, xflags) \
7277 counts[xcount].site = (xcount); \
7278 counts[xcount].size = (xsize); \
7279 counts[xcount].flags = VM_KERN_SITE_COUNTER | xflags;
7280
7281 SET_COUNT(VM_KERN_COUNT_MANAGED, ptoa_64(vm_page_pages), 0);
7282 SET_COUNT(VM_KERN_COUNT_WIRED, wired_size, 0);
7283 SET_COUNT(VM_KERN_COUNT_WIRED_MANAGED, wired_managed_size, 0);
7284 SET_COUNT(VM_KERN_COUNT_RESERVED, wired_reserved_size, VM_KERN_SITE_WIRED);
7285 SET_COUNT(VM_KERN_COUNT_STOLEN, ptoa_64(vm_page_stolen_count), VM_KERN_SITE_WIRED);
7286 SET_COUNT(VM_KERN_COUNT_LOPAGE, ptoa_64(vm_lopage_free_count), VM_KERN_SITE_WIRED);
7287
7288 #define SET_MAP(xcount, xsize, xfree, xlargest) \
7289 counts[xcount].site = (xcount); \
7290 counts[xcount].size = (xsize); \
7291 counts[xcount].free = (xfree); \
7292 counts[xcount].largest = (xlargest); \
7293 counts[xcount].flags = VM_KERN_SITE_COUNTER;
7294
7295 vm_map_size_t map_size, map_free, map_largest;
7296
7297 vm_map_sizes(kernel_map, &map_size, &map_free, &map_largest);
7298 SET_MAP(VM_KERN_COUNT_MAP_KERNEL, map_size, map_free, map_largest);
7299
7300 vm_map_sizes(zone_map, &map_size, &map_free, &map_largest);
7301 SET_MAP(VM_KERN_COUNT_MAP_ZONE, map_size, map_free, map_largest);
7302
7303 vm_map_sizes(kalloc_map, &map_size, &map_free, &map_largest);
7304 SET_MAP(VM_KERN_COUNT_MAP_KALLOC, map_size, map_free, map_largest);
7305
7306 map = kernel_map;
7307 stackIdx = 0;
7308 while (map)
7309 {
7310 vm_map_lock(map);
7311 for (entry = map->hdr.links.next; map; entry = entry->links.next)
7312 {
7313 if (entry->is_sub_map)
7314 {
7315 assert(stackIdx < kMaxKernelDepth);
7316 maps[stackIdx] = map;
7317 entries[stackIdx] = entry;
7318 stackIdx++;
7319 map = VME_SUBMAP(entry);
7320 entry = NULL;
7321 break;
7322 }
7323 if (VME_OBJECT(entry) == kernel_object)
7324 {
7325 count = 0;
7326 vm_object_lock(VME_OBJECT(entry));
7327 for (offset = entry->links.start; offset < entry->links.end; offset += page_size)
7328 {
7329 page = vm_page_lookup(VME_OBJECT(entry), offset);
7330 if (page && VM_PAGE_WIRED(page)) count++;
7331 }
7332 vm_object_unlock(VME_OBJECT(entry));
7333
7334 if (count)
7335 {
7336 assert(VME_ALIAS(entry) < num_sites);
7337 sites[VME_ALIAS(entry)].size += ptoa_64(count);
7338 }
7339 }
7340 if (entry == vm_map_last_entry(map))
7341 {
7342 vm_map_unlock(map);
7343 if (!stackIdx) map = NULL;
7344 else
7345 {
7346 --stackIdx;
7347 map = maps[stackIdx];
7348 entry = entries[stackIdx];
7349 }
7350 }
7351 }
7352 }
7353
7354 process_account(sites, num_sites);
7355
7356 return (KERN_SUCCESS);
7357 }