]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
xnu-2782.40.9.tar.gz
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <kern/ledger.h>
80 #include <vm/pmap.h>
81 #include <vm/vm_init.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
86 #include <kern/misc_protos.h>
87 #include <zone_debug.h>
88 #include <vm/cpm.h>
89 #include <pexpert/pexpert.h>
90
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94 #include <vm/vm_compressor.h>
95
96 #if CONFIG_PHANTOM_CACHE
97 #include <vm/vm_phantom_cache.h>
98 #endif
99
100 #include <IOKit/IOHibernatePrivate.h>
101
102 #include <sys/kdebug.h>
103
104 boolean_t hibernate_cleaning_in_progress = FALSE;
105 boolean_t vm_page_free_verify = TRUE;
106
107 uint32_t vm_lopage_free_count = 0;
108 uint32_t vm_lopage_free_limit = 0;
109 uint32_t vm_lopage_lowater = 0;
110 boolean_t vm_lopage_refill = FALSE;
111 boolean_t vm_lopage_needed = FALSE;
112
113 lck_mtx_ext_t vm_page_queue_lock_ext;
114 lck_mtx_ext_t vm_page_queue_free_lock_ext;
115 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
116
117 int speculative_age_index = 0;
118 int speculative_steal_index = 0;
119 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
120
121
122 __private_extern__ void vm_page_init_lck_grp(void);
123
124 static void vm_page_free_prepare(vm_page_t page);
125 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
126
127
128
129
130 /*
131 * Associated with page of user-allocatable memory is a
132 * page structure.
133 */
134
135 /*
136 * These variables record the values returned by vm_page_bootstrap,
137 * for debugging purposes. The implementation of pmap_steal_memory
138 * and pmap_startup here also uses them internally.
139 */
140
141 vm_offset_t virtual_space_start;
142 vm_offset_t virtual_space_end;
143 uint32_t vm_page_pages;
144
145 /*
146 * The vm_page_lookup() routine, which provides for fast
147 * (virtual memory object, offset) to page lookup, employs
148 * the following hash table. The vm_page_{insert,remove}
149 * routines install and remove associations in the table.
150 * [This table is often called the virtual-to-physical,
151 * or VP, table.]
152 */
153 typedef struct {
154 vm_page_packed_t page_list;
155 #if MACH_PAGE_HASH_STATS
156 int cur_count; /* current count */
157 int hi_count; /* high water mark */
158 #endif /* MACH_PAGE_HASH_STATS */
159 } vm_page_bucket_t;
160
161
162 #define BUCKETS_PER_LOCK 16
163
164 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
165 unsigned int vm_page_bucket_count = 0; /* How big is array? */
166 unsigned int vm_page_hash_mask; /* Mask for hash function */
167 unsigned int vm_page_hash_shift; /* Shift for hash function */
168 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
169 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
170
171 lck_spin_t *vm_page_bucket_locks;
172
173 #if VM_PAGE_BUCKETS_CHECK
174 boolean_t vm_page_buckets_check_ready = FALSE;
175 #if VM_PAGE_FAKE_BUCKETS
176 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
177 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
178 #endif /* VM_PAGE_FAKE_BUCKETS */
179 #endif /* VM_PAGE_BUCKETS_CHECK */
180
181 #if MACH_PAGE_HASH_STATS
182 /* This routine is only for debug. It is intended to be called by
183 * hand by a developer using a kernel debugger. This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
185 */
186 void
187 hash_debug(void)
188 {
189 int i;
190 int numbuckets = 0;
191 int highsum = 0;
192 int maxdepth = 0;
193
194 for (i = 0; i < vm_page_bucket_count; i++) {
195 if (vm_page_buckets[i].hi_count) {
196 numbuckets++;
197 highsum += vm_page_buckets[i].hi_count;
198 if (vm_page_buckets[i].hi_count > maxdepth)
199 maxdepth = vm_page_buckets[i].hi_count;
200 }
201 }
202 printf("Total number of buckets: %d\n", vm_page_bucket_count);
203 printf("Number used buckets: %d = %d%%\n",
204 numbuckets, 100*numbuckets/vm_page_bucket_count);
205 printf("Number unused buckets: %d = %d%%\n",
206 vm_page_bucket_count - numbuckets,
207 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
208 printf("Sum of bucket max depth: %d\n", highsum);
209 printf("Average bucket depth: %d.%2d\n",
210 highsum/vm_page_bucket_count,
211 highsum%vm_page_bucket_count);
212 printf("Maximum bucket depth: %d\n", maxdepth);
213 }
214 #endif /* MACH_PAGE_HASH_STATS */
215
216 /*
217 * The virtual page size is currently implemented as a runtime
218 * variable, but is constant once initialized using vm_set_page_size.
219 * This initialization must be done in the machine-dependent
220 * bootstrap sequence, before calling other machine-independent
221 * initializations.
222 *
223 * All references to the virtual page size outside this
224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
225 * constants.
226 */
227 vm_size_t page_size = PAGE_SIZE;
228 vm_size_t page_mask = PAGE_MASK;
229 int page_shift = PAGE_SHIFT;
230
231 /*
232 * Resident page structures are initialized from
233 * a template (see vm_page_alloc).
234 *
235 * When adding a new field to the virtual memory
236 * object structure, be sure to add initialization
237 * (see vm_page_bootstrap).
238 */
239 struct vm_page vm_page_template;
240
241 vm_page_t vm_pages = VM_PAGE_NULL;
242 unsigned int vm_pages_count = 0;
243 ppnum_t vm_page_lowest = 0;
244
245 /*
246 * Resident pages that represent real memory
247 * are allocated from a set of free lists,
248 * one per color.
249 */
250 unsigned int vm_colors;
251 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
252 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
253 unsigned int vm_free_magazine_refill_limit = 0;
254 queue_head_t vm_page_queue_free[MAX_COLORS];
255 unsigned int vm_page_free_wanted;
256 unsigned int vm_page_free_wanted_privileged;
257 unsigned int vm_page_free_count;
258 unsigned int vm_page_fictitious_count;
259
260 /*
261 * Occasionally, the virtual memory system uses
262 * resident page structures that do not refer to
263 * real pages, for example to leave a page with
264 * important state information in the VP table.
265 *
266 * These page structures are allocated the way
267 * most other kernel structures are.
268 */
269 zone_t vm_page_zone;
270 vm_locks_array_t vm_page_locks;
271 decl_lck_mtx_data(,vm_page_alloc_lock)
272 lck_mtx_ext_t vm_page_alloc_lock_ext;
273
274 unsigned int io_throttle_zero_fill;
275
276 unsigned int vm_page_local_q_count = 0;
277 unsigned int vm_page_local_q_soft_limit = 250;
278 unsigned int vm_page_local_q_hard_limit = 500;
279 struct vplq *vm_page_local_q = NULL;
280
281 /* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
283 */
284 /*
285 * Fictitious pages don't have a physical address,
286 * but we must initialize phys_page to something.
287 * For debugging, this should be a strange value
288 * that the pmap module can recognize in assertions.
289 */
290 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
291
292 /*
293 * Guard pages are not accessible so they don't
294 * need a physical address, but we need to enter
295 * one in the pmap.
296 * Let's make it recognizable and make sure that
297 * we don't use a real physical page with that
298 * physical address.
299 */
300 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
301
302 /*
303 * Resident page structures are also chained on
304 * queues that are used by the page replacement
305 * system (pageout daemon). These queues are
306 * defined here, but are shared by the pageout
307 * module. The inactive queue is broken into
308 * file backed and anonymous for convenience as the
309 * pageout daemon often assignes a higher
310 * importance to anonymous pages (less likely to pick)
311 */
312 queue_head_t vm_page_queue_active;
313 queue_head_t vm_page_queue_inactive;
314 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
315 queue_head_t vm_page_queue_throttled;
316
317 unsigned int vm_page_active_count;
318 unsigned int vm_page_inactive_count;
319 unsigned int vm_page_anonymous_count;
320 unsigned int vm_page_throttled_count;
321 unsigned int vm_page_speculative_count;
322 unsigned int vm_page_wire_count;
323 unsigned int vm_page_wire_count_initial;
324 unsigned int vm_page_gobble_count = 0;
325
326 #define VM_PAGE_WIRE_COUNT_WARNING 0
327 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
328
329 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
330 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
331 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
332
333 unsigned int vm_page_xpmapped_external_count = 0;
334 unsigned int vm_page_external_count = 0;
335 unsigned int vm_page_internal_count = 0;
336 unsigned int vm_page_pageable_external_count = 0;
337 unsigned int vm_page_pageable_internal_count = 0;
338
339 #if DEVELOPMENT || DEBUG
340 unsigned int vm_page_speculative_recreated = 0;
341 unsigned int vm_page_speculative_created = 0;
342 unsigned int vm_page_speculative_used = 0;
343 #endif
344
345 queue_head_t vm_page_queue_cleaned;
346
347 unsigned int vm_page_cleaned_count = 0;
348 unsigned int vm_pageout_enqueued_cleaned = 0;
349
350 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
351 ppnum_t max_valid_low_ppnum = 0xffffffff;
352
353
354 /*
355 * Several page replacement parameters are also
356 * shared with this module, so that page allocation
357 * (done here in vm_page_alloc) can trigger the
358 * pageout daemon.
359 */
360 unsigned int vm_page_free_target = 0;
361 unsigned int vm_page_free_min = 0;
362 unsigned int vm_page_throttle_limit = 0;
363 unsigned int vm_page_inactive_target = 0;
364 unsigned int vm_page_anonymous_min = 0;
365 unsigned int vm_page_inactive_min = 0;
366 unsigned int vm_page_free_reserved = 0;
367 unsigned int vm_page_throttle_count = 0;
368
369
370 /*
371 * The VM system has a couple of heuristics for deciding
372 * that pages are "uninteresting" and should be placed
373 * on the inactive queue as likely candidates for replacement.
374 * These variables let the heuristics be controlled at run-time
375 * to make experimentation easier.
376 */
377
378 boolean_t vm_page_deactivate_hint = TRUE;
379
380 struct vm_page_stats_reusable vm_page_stats_reusable;
381
382 /*
383 * vm_set_page_size:
384 *
385 * Sets the page size, perhaps based upon the memory
386 * size. Must be called before any use of page-size
387 * dependent functions.
388 *
389 * Sets page_shift and page_mask from page_size.
390 */
391 void
392 vm_set_page_size(void)
393 {
394 page_size = PAGE_SIZE;
395 page_mask = PAGE_MASK;
396 page_shift = PAGE_SHIFT;
397
398 if ((page_mask & page_size) != 0)
399 panic("vm_set_page_size: page size not a power of two");
400
401 for (page_shift = 0; ; page_shift++)
402 if ((1U << page_shift) == page_size)
403 break;
404 }
405
406 #define COLOR_GROUPS_TO_STEAL 4
407
408
409 /* Called once during statup, once the cache geometry is known.
410 */
411 static void
412 vm_page_set_colors( void )
413 {
414 unsigned int n, override;
415
416 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
417 n = override;
418 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
419 n = vm_cache_geometry_colors;
420 else n = DEFAULT_COLORS; /* use default if all else fails */
421
422 if ( n == 0 )
423 n = 1;
424 if ( n > MAX_COLORS )
425 n = MAX_COLORS;
426
427 /* the count must be a power of 2 */
428 if ( ( n & (n - 1)) != 0 )
429 panic("vm_page_set_colors");
430
431 vm_colors = n;
432 vm_color_mask = n - 1;
433
434 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
435 }
436
437
438 lck_grp_t vm_page_lck_grp_free;
439 lck_grp_t vm_page_lck_grp_queue;
440 lck_grp_t vm_page_lck_grp_local;
441 lck_grp_t vm_page_lck_grp_purge;
442 lck_grp_t vm_page_lck_grp_alloc;
443 lck_grp_t vm_page_lck_grp_bucket;
444 lck_grp_attr_t vm_page_lck_grp_attr;
445 lck_attr_t vm_page_lck_attr;
446
447
448 __private_extern__ void
449 vm_page_init_lck_grp(void)
450 {
451 /*
452 * initialze the vm_page lock world
453 */
454 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
455 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
456 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
457 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
458 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
459 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
460 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
461 lck_attr_setdefault(&vm_page_lck_attr);
462 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
463
464 vm_compressor_init_locks();
465 }
466
467 void
468 vm_page_init_local_q()
469 {
470 unsigned int num_cpus;
471 unsigned int i;
472 struct vplq *t_local_q;
473
474 num_cpus = ml_get_max_cpus();
475
476 /*
477 * no point in this for a uni-processor system
478 */
479 if (num_cpus >= 2) {
480 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
481
482 for (i = 0; i < num_cpus; i++) {
483 struct vpl *lq;
484
485 lq = &t_local_q[i].vpl_un.vpl;
486 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
487 queue_init(&lq->vpl_queue);
488 lq->vpl_count = 0;
489 lq->vpl_internal_count = 0;
490 lq->vpl_external_count = 0;
491 }
492 vm_page_local_q_count = num_cpus;
493
494 vm_page_local_q = (struct vplq *)t_local_q;
495 }
496 }
497
498
499 /*
500 * vm_page_bootstrap:
501 *
502 * Initializes the resident memory module.
503 *
504 * Allocates memory for the page cells, and
505 * for the object/offset-to-page hash table headers.
506 * Each page cell is initialized and placed on the free list.
507 * Returns the range of available kernel virtual memory.
508 */
509
510 void
511 vm_page_bootstrap(
512 vm_offset_t *startp,
513 vm_offset_t *endp)
514 {
515 register vm_page_t m;
516 unsigned int i;
517 unsigned int log1;
518 unsigned int log2;
519 unsigned int size;
520
521 /*
522 * Initialize the vm_page template.
523 */
524
525 m = &vm_page_template;
526 bzero(m, sizeof (*m));
527
528 m->pageq.next = NULL;
529 m->pageq.prev = NULL;
530 m->listq.next = NULL;
531 m->listq.prev = NULL;
532 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
533
534 m->object = VM_OBJECT_NULL; /* reset later */
535 m->offset = (vm_object_offset_t) -1; /* reset later */
536
537 m->wire_count = 0;
538 m->local = FALSE;
539 m->inactive = FALSE;
540 m->active = FALSE;
541 m->pageout_queue = FALSE;
542 m->speculative = FALSE;
543 m->laundry = FALSE;
544 m->free = FALSE;
545 m->reference = FALSE;
546 m->gobbled = FALSE;
547 m->private = FALSE;
548 m->throttled = FALSE;
549 m->__unused_pageq_bits = 0;
550
551 m->phys_page = 0; /* reset later */
552
553 m->busy = TRUE;
554 m->wanted = FALSE;
555 m->tabled = FALSE;
556 m->hashed = FALSE;
557 m->fictitious = FALSE;
558 m->pmapped = FALSE;
559 m->wpmapped = FALSE;
560 m->pageout = FALSE;
561 m->absent = FALSE;
562 m->error = FALSE;
563 m->dirty = FALSE;
564 m->cleaning = FALSE;
565 m->precious = FALSE;
566 m->clustered = FALSE;
567 m->overwriting = FALSE;
568 m->restart = FALSE;
569 m->unusual = FALSE;
570 m->encrypted = FALSE;
571 m->encrypted_cleaning = FALSE;
572 m->cs_validated = FALSE;
573 m->cs_tainted = FALSE;
574 m->cs_nx = FALSE;
575 m->no_cache = FALSE;
576 m->reusable = FALSE;
577 m->slid = FALSE;
578 m->xpmapped = FALSE;
579 m->compressor = FALSE;
580 m->written_by_kernel = FALSE;
581 m->__unused_object_bits = 0;
582
583 /*
584 * Initialize the page queues.
585 */
586 vm_page_init_lck_grp();
587
588 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
589 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
590 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
591
592 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
593 int group;
594
595 purgeable_queues[i].token_q_head = 0;
596 purgeable_queues[i].token_q_tail = 0;
597 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
598 queue_init(&purgeable_queues[i].objq[group]);
599
600 purgeable_queues[i].type = i;
601 purgeable_queues[i].new_pages = 0;
602 #if MACH_ASSERT
603 purgeable_queues[i].debug_count_tokens = 0;
604 purgeable_queues[i].debug_count_objects = 0;
605 #endif
606 };
607 purgeable_nonvolatile_count = 0;
608 queue_init(&purgeable_nonvolatile_queue);
609
610 for (i = 0; i < MAX_COLORS; i++ )
611 queue_init(&vm_page_queue_free[i]);
612
613 queue_init(&vm_lopage_queue_free);
614 queue_init(&vm_page_queue_active);
615 queue_init(&vm_page_queue_inactive);
616 queue_init(&vm_page_queue_cleaned);
617 queue_init(&vm_page_queue_throttled);
618 queue_init(&vm_page_queue_anonymous);
619
620 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
621 queue_init(&vm_page_queue_speculative[i].age_q);
622
623 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
624 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
625 }
626 vm_page_free_wanted = 0;
627 vm_page_free_wanted_privileged = 0;
628
629 vm_page_set_colors();
630
631
632 /*
633 * Steal memory for the map and zone subsystems.
634 */
635 kernel_debug_string("zone_steal_memory");
636 zone_steal_memory();
637 kernel_debug_string("vm_map_steal_memory");
638 vm_map_steal_memory();
639
640 /*
641 * Allocate (and initialize) the virtual-to-physical
642 * table hash buckets.
643 *
644 * The number of buckets should be a power of two to
645 * get a good hash function. The following computation
646 * chooses the first power of two that is greater
647 * than the number of physical pages in the system.
648 */
649
650 if (vm_page_bucket_count == 0) {
651 unsigned int npages = pmap_free_pages();
652
653 vm_page_bucket_count = 1;
654 while (vm_page_bucket_count < npages)
655 vm_page_bucket_count <<= 1;
656 }
657 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
658
659 vm_page_hash_mask = vm_page_bucket_count - 1;
660
661 /*
662 * Calculate object shift value for hashing algorithm:
663 * O = log2(sizeof(struct vm_object))
664 * B = log2(vm_page_bucket_count)
665 * hash shifts the object left by
666 * B/2 - O
667 */
668 size = vm_page_bucket_count;
669 for (log1 = 0; size > 1; log1++)
670 size /= 2;
671 size = sizeof(struct vm_object);
672 for (log2 = 0; size > 1; log2++)
673 size /= 2;
674 vm_page_hash_shift = log1/2 - log2 + 1;
675
676 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
677 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
678 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
679
680 if (vm_page_hash_mask & vm_page_bucket_count)
681 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
682
683 #if VM_PAGE_BUCKETS_CHECK
684 #if VM_PAGE_FAKE_BUCKETS
685 /*
686 * Allocate a decoy set of page buckets, to detect
687 * any stomping there.
688 */
689 vm_page_fake_buckets = (vm_page_bucket_t *)
690 pmap_steal_memory(vm_page_bucket_count *
691 sizeof(vm_page_bucket_t));
692 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
693 vm_page_fake_buckets_end =
694 vm_map_round_page((vm_page_fake_buckets_start +
695 (vm_page_bucket_count *
696 sizeof (vm_page_bucket_t))),
697 PAGE_MASK);
698 char *cp;
699 for (cp = (char *)vm_page_fake_buckets_start;
700 cp < (char *)vm_page_fake_buckets_end;
701 cp++) {
702 *cp = 0x5a;
703 }
704 #endif /* VM_PAGE_FAKE_BUCKETS */
705 #endif /* VM_PAGE_BUCKETS_CHECK */
706
707 kernel_debug_string("vm_page_buckets");
708 vm_page_buckets = (vm_page_bucket_t *)
709 pmap_steal_memory(vm_page_bucket_count *
710 sizeof(vm_page_bucket_t));
711
712 kernel_debug_string("vm_page_bucket_locks");
713 vm_page_bucket_locks = (lck_spin_t *)
714 pmap_steal_memory(vm_page_bucket_lock_count *
715 sizeof(lck_spin_t));
716
717 for (i = 0; i < vm_page_bucket_count; i++) {
718 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
719
720 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
721 #if MACH_PAGE_HASH_STATS
722 bucket->cur_count = 0;
723 bucket->hi_count = 0;
724 #endif /* MACH_PAGE_HASH_STATS */
725 }
726
727 for (i = 0; i < vm_page_bucket_lock_count; i++)
728 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
729
730 #if VM_PAGE_BUCKETS_CHECK
731 vm_page_buckets_check_ready = TRUE;
732 #endif /* VM_PAGE_BUCKETS_CHECK */
733
734 /*
735 * Machine-dependent code allocates the resident page table.
736 * It uses vm_page_init to initialize the page frames.
737 * The code also returns to us the virtual space available
738 * to the kernel. We don't trust the pmap module
739 * to get the alignment right.
740 */
741
742 kernel_debug_string("pmap_startup");
743 pmap_startup(&virtual_space_start, &virtual_space_end);
744 virtual_space_start = round_page(virtual_space_start);
745 virtual_space_end = trunc_page(virtual_space_end);
746
747 *startp = virtual_space_start;
748 *endp = virtual_space_end;
749
750 /*
751 * Compute the initial "wire" count.
752 * Up until now, the pages which have been set aside are not under
753 * the VM system's control, so although they aren't explicitly
754 * wired, they nonetheless can't be moved. At this moment,
755 * all VM managed pages are "free", courtesy of pmap_startup.
756 */
757 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
758 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
759 vm_page_wire_count_initial = vm_page_wire_count;
760
761 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
762 vm_page_free_count, vm_page_wire_count);
763
764 kernel_debug_string("vm_page_bootstrap complete");
765 simple_lock_init(&vm_paging_lock, 0);
766 }
767
768 #ifndef MACHINE_PAGES
769 /*
770 * We implement pmap_steal_memory and pmap_startup with the help
771 * of two simpler functions, pmap_virtual_space and pmap_next_page.
772 */
773
774 void *
775 pmap_steal_memory(
776 vm_size_t size)
777 {
778 vm_offset_t addr, vaddr;
779 ppnum_t phys_page;
780
781 /*
782 * We round the size to a round multiple.
783 */
784
785 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
786
787 /*
788 * If this is the first call to pmap_steal_memory,
789 * we have to initialize ourself.
790 */
791
792 if (virtual_space_start == virtual_space_end) {
793 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
794
795 /*
796 * The initial values must be aligned properly, and
797 * we don't trust the pmap module to do it right.
798 */
799
800 virtual_space_start = round_page(virtual_space_start);
801 virtual_space_end = trunc_page(virtual_space_end);
802 }
803
804 /*
805 * Allocate virtual memory for this request.
806 */
807
808 addr = virtual_space_start;
809 virtual_space_start += size;
810
811 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
812
813 /*
814 * Allocate and map physical pages to back new virtual pages.
815 */
816
817 for (vaddr = round_page(addr);
818 vaddr < addr + size;
819 vaddr += PAGE_SIZE) {
820
821 if (!pmap_next_page_hi(&phys_page))
822 panic("pmap_steal_memory");
823
824 /*
825 * XXX Logically, these mappings should be wired,
826 * but some pmap modules barf if they are.
827 */
828 #if defined(__LP64__)
829 pmap_pre_expand(kernel_pmap, vaddr);
830 #endif
831
832 pmap_enter(kernel_pmap, vaddr, phys_page,
833 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
834 VM_WIMG_USE_DEFAULT, FALSE);
835 /*
836 * Account for newly stolen memory
837 */
838 vm_page_wire_count++;
839
840 }
841
842 return (void *) addr;
843 }
844
845 void vm_page_release_startup(vm_page_t mem);
846 void
847 pmap_startup(
848 vm_offset_t *startp,
849 vm_offset_t *endp)
850 {
851 unsigned int i, npages, pages_initialized, fill, fillval;
852 ppnum_t phys_page;
853 addr64_t tmpaddr;
854
855
856 #if defined(__LP64__)
857 /*
858 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
859 */
860 assert(sizeof(struct vm_page) == 64);
861
862 /*
863 * make sure we are aligned on a 64 byte boundary
864 * for VM_PAGE_PACK_PTR (it clips off the low-order
865 * 6 bits of the pointer)
866 */
867 if (virtual_space_start != virtual_space_end)
868 virtual_space_start = round_page(virtual_space_start);
869 #endif
870
871 /*
872 * We calculate how many page frames we will have
873 * and then allocate the page structures in one chunk.
874 */
875
876 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
877 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
878 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
879
880 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
881
882 /*
883 * Initialize the page frames.
884 */
885 kernel_debug_string("Initialize the page frames");
886 for (i = 0, pages_initialized = 0; i < npages; i++) {
887 if (!pmap_next_page(&phys_page))
888 break;
889 if (pages_initialized == 0 || phys_page < vm_page_lowest)
890 vm_page_lowest = phys_page;
891
892 vm_page_init(&vm_pages[i], phys_page, FALSE);
893 vm_page_pages++;
894 pages_initialized++;
895 }
896 vm_pages_count = pages_initialized;
897
898 #if defined(__LP64__)
899
900 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
901 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
902
903 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
904 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
905 #endif
906 kernel_debug_string("page fill/release");
907 /*
908 * Check if we want to initialize pages to a known value
909 */
910 fill = 0; /* Assume no fill */
911 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
912 #if DEBUG
913 /* This slows down booting the DEBUG kernel, particularly on
914 * large memory systems, but is worthwhile in deterministically
915 * trapping uninitialized memory usage.
916 */
917 if (fill == 0) {
918 fill = 1;
919 fillval = 0xDEB8F177;
920 }
921 #endif
922 if (fill)
923 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
924 // -debug code remove
925 if (2 == vm_himemory_mode) {
926 // free low -> high so high is preferred
927 for (i = 1; i <= pages_initialized; i++) {
928 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
929 vm_page_release_startup(&vm_pages[i - 1]);
930 }
931 }
932 else
933 // debug code remove-
934
935 /*
936 * Release pages in reverse order so that physical pages
937 * initially get allocated in ascending addresses. This keeps
938 * the devices (which must address physical memory) happy if
939 * they require several consecutive pages.
940 */
941 for (i = pages_initialized; i > 0; i--) {
942 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
943 vm_page_release_startup(&vm_pages[i - 1]);
944 }
945
946 VM_CHECK_MEMORYSTATUS;
947
948 #if 0
949 {
950 vm_page_t xx, xxo, xxl;
951 int i, j, k, l;
952
953 j = 0; /* (BRINGUP) */
954 xxl = 0;
955
956 for( i = 0; i < vm_colors; i++ ) {
957 queue_iterate(&vm_page_queue_free[i],
958 xx,
959 vm_page_t,
960 pageq) { /* BRINGUP */
961 j++; /* (BRINGUP) */
962 if(j > vm_page_free_count) { /* (BRINGUP) */
963 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
964 }
965
966 l = vm_page_free_count - j; /* (BRINGUP) */
967 k = 0; /* (BRINGUP) */
968
969 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
970
971 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
972 k++;
973 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
974 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
975 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
976 }
977 }
978
979 xxl = xx;
980 }
981 }
982
983 if(j != vm_page_free_count) { /* (BRINGUP) */
984 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
985 }
986 }
987 #endif
988
989
990 /*
991 * We have to re-align virtual_space_start,
992 * because pmap_steal_memory has been using it.
993 */
994
995 virtual_space_start = round_page(virtual_space_start);
996
997 *startp = virtual_space_start;
998 *endp = virtual_space_end;
999 }
1000 #endif /* MACHINE_PAGES */
1001
1002 /*
1003 * Routine: vm_page_module_init
1004 * Purpose:
1005 * Second initialization pass, to be done after
1006 * the basic VM system is ready.
1007 */
1008 void
1009 vm_page_module_init(void)
1010 {
1011 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1012 0, PAGE_SIZE, "vm pages");
1013
1014 #if ZONE_DEBUG
1015 zone_debug_disable(vm_page_zone);
1016 #endif /* ZONE_DEBUG */
1017
1018 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1019 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1020 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1021 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1022 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1023 /*
1024 * Adjust zone statistics to account for the real pages allocated
1025 * in vm_page_create(). [Q: is this really what we want?]
1026 */
1027 vm_page_zone->count += vm_page_pages;
1028 vm_page_zone->sum_count += vm_page_pages;
1029 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1030 }
1031
1032 /*
1033 * Routine: vm_page_create
1034 * Purpose:
1035 * After the VM system is up, machine-dependent code
1036 * may stumble across more physical memory. For example,
1037 * memory that it was reserving for a frame buffer.
1038 * vm_page_create turns this memory into available pages.
1039 */
1040
1041 void
1042 vm_page_create(
1043 ppnum_t start,
1044 ppnum_t end)
1045 {
1046 ppnum_t phys_page;
1047 vm_page_t m;
1048
1049 for (phys_page = start;
1050 phys_page < end;
1051 phys_page++) {
1052 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1053 == VM_PAGE_NULL)
1054 vm_page_more_fictitious();
1055
1056 m->fictitious = FALSE;
1057 pmap_clear_noencrypt(phys_page);
1058
1059 vm_page_pages++;
1060 vm_page_release(m);
1061 }
1062 }
1063
1064 /*
1065 * vm_page_hash:
1066 *
1067 * Distributes the object/offset key pair among hash buckets.
1068 *
1069 * NOTE: The bucket count must be a power of 2
1070 */
1071 #define vm_page_hash(object, offset) (\
1072 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073 & vm_page_hash_mask)
1074
1075
1076 /*
1077 * vm_page_insert: [ internal use only ]
1078 *
1079 * Inserts the given mem entry into the object/object-page
1080 * table and object list.
1081 *
1082 * The object must be locked.
1083 */
1084 void
1085 vm_page_insert(
1086 vm_page_t mem,
1087 vm_object_t object,
1088 vm_object_offset_t offset)
1089 {
1090 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1091 }
1092
1093 void
1094 vm_page_insert_internal(
1095 vm_page_t mem,
1096 vm_object_t object,
1097 vm_object_offset_t offset,
1098 boolean_t queues_lock_held,
1099 boolean_t insert_in_hash,
1100 boolean_t batch_pmap_op)
1101 {
1102 vm_page_bucket_t *bucket;
1103 lck_spin_t *bucket_lock;
1104 int hash_id;
1105 task_t owner;
1106
1107 XPR(XPR_VM_PAGE,
1108 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109 object, offset, mem, 0,0);
1110 #if 0
1111 /*
1112 * we may not hold the page queue lock
1113 * so this check isn't safe to make
1114 */
1115 VM_PAGE_CHECK(mem);
1116 #endif
1117
1118 assert(page_aligned(offset));
1119
1120 /* the vm_submap_object is only a placeholder for submaps */
1121 assert(object != vm_submap_object);
1122
1123 vm_object_lock_assert_exclusive(object);
1124 #if DEBUG
1125 lck_mtx_assert(&vm_page_queue_lock,
1126 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1127 : LCK_MTX_ASSERT_NOTOWNED);
1128 #endif /* DEBUG */
1129
1130 if (insert_in_hash == TRUE) {
1131 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1132 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1133 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134 "already in (obj=%p,off=0x%llx)",
1135 mem, object, offset, mem->object, mem->offset);
1136 #endif
1137 assert(!object->internal || offset < object->vo_size);
1138
1139 /* only insert "pageout" pages into "pageout" objects,
1140 * and normal pages into normal objects */
1141 assert(object->pageout == mem->pageout);
1142
1143 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1144
1145 /*
1146 * Record the object/offset pair in this page
1147 */
1148
1149 mem->object = object;
1150 mem->offset = offset;
1151
1152 /*
1153 * Insert it into the object_object/offset hash table
1154 */
1155 hash_id = vm_page_hash(object, offset);
1156 bucket = &vm_page_buckets[hash_id];
1157 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1158
1159 lck_spin_lock(bucket_lock);
1160
1161 mem->next_m = bucket->page_list;
1162 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1163 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1164
1165 #if MACH_PAGE_HASH_STATS
1166 if (++bucket->cur_count > bucket->hi_count)
1167 bucket->hi_count = bucket->cur_count;
1168 #endif /* MACH_PAGE_HASH_STATS */
1169 mem->hashed = TRUE;
1170 lck_spin_unlock(bucket_lock);
1171 }
1172
1173 {
1174 unsigned int cache_attr;
1175
1176 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1177
1178 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1179 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1180 }
1181 }
1182 /*
1183 * Now link into the object's list of backed pages.
1184 */
1185 VM_PAGE_INSERT(mem, object);
1186 mem->tabled = TRUE;
1187
1188 /*
1189 * Show that the object has one more resident page.
1190 */
1191
1192 object->resident_page_count++;
1193 if (VM_PAGE_WIRED(mem)) {
1194 object->wired_page_count++;
1195 }
1196 assert(object->resident_page_count >= object->wired_page_count);
1197
1198 if (object->internal) {
1199 OSAddAtomic(1, &vm_page_internal_count);
1200 } else {
1201 OSAddAtomic(1, &vm_page_external_count);
1202 }
1203
1204 /*
1205 * It wouldn't make sense to insert a "reusable" page in
1206 * an object (the page would have been marked "reusable" only
1207 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208 * in the object at that time).
1209 * But a page could be inserted in a "all_reusable" object, if
1210 * something faults it in (a vm_read() from another task or a
1211 * "use-after-free" issue in user space, for example). It can
1212 * also happen if we're relocating a page from that object to
1213 * a different physical page during a physically-contiguous
1214 * allocation.
1215 */
1216 assert(!mem->reusable);
1217 if (mem->object->all_reusable) {
1218 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1219 }
1220
1221 if (object->purgable == VM_PURGABLE_DENY) {
1222 owner = TASK_NULL;
1223 } else {
1224 owner = object->vo_purgeable_owner;
1225 }
1226 if (owner &&
1227 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1228 VM_PAGE_WIRED(mem))) {
1229 /* more non-volatile bytes */
1230 ledger_credit(owner->ledger,
1231 task_ledgers.purgeable_nonvolatile,
1232 PAGE_SIZE);
1233 /* more footprint */
1234 ledger_credit(owner->ledger,
1235 task_ledgers.phys_footprint,
1236 PAGE_SIZE);
1237
1238 } else if (owner &&
1239 (object->purgable == VM_PURGABLE_VOLATILE ||
1240 object->purgable == VM_PURGABLE_EMPTY)) {
1241 assert(! VM_PAGE_WIRED(mem));
1242 /* more volatile bytes */
1243 ledger_credit(owner->ledger,
1244 task_ledgers.purgeable_volatile,
1245 PAGE_SIZE);
1246 }
1247
1248 if (object->purgable == VM_PURGABLE_VOLATILE) {
1249 if (VM_PAGE_WIRED(mem)) {
1250 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1251 } else {
1252 OSAddAtomic(+1, &vm_page_purgeable_count);
1253 }
1254 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1255 mem->throttled) {
1256 /*
1257 * This page belongs to a purged VM object but hasn't
1258 * been purged (because it was "busy").
1259 * It's in the "throttled" queue and hence not
1260 * visible to vm_pageout_scan(). Move it to a pageable
1261 * queue, so that it can eventually be reclaimed, instead
1262 * of lingering in the "empty" object.
1263 */
1264 if (queues_lock_held == FALSE)
1265 vm_page_lockspin_queues();
1266 vm_page_deactivate(mem);
1267 if (queues_lock_held == FALSE)
1268 vm_page_unlock_queues();
1269 }
1270
1271 #if VM_OBJECT_TRACKING_OP_MODIFIED
1272 if (vm_object_tracking_inited &&
1273 object->internal &&
1274 object->resident_page_count == 0 &&
1275 object->pager == NULL &&
1276 object->shadow != NULL &&
1277 object->shadow->copy == object) {
1278 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1279 int numsaved = 0;
1280
1281 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1282 btlog_add_entry(vm_object_tracking_btlog,
1283 object,
1284 VM_OBJECT_TRACKING_OP_MODIFIED,
1285 bt,
1286 numsaved);
1287 }
1288 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1289 }
1290
1291 /*
1292 * vm_page_replace:
1293 *
1294 * Exactly like vm_page_insert, except that we first
1295 * remove any existing page at the given offset in object.
1296 *
1297 * The object must be locked.
1298 */
1299 void
1300 vm_page_replace(
1301 register vm_page_t mem,
1302 register vm_object_t object,
1303 register vm_object_offset_t offset)
1304 {
1305 vm_page_bucket_t *bucket;
1306 vm_page_t found_m = VM_PAGE_NULL;
1307 lck_spin_t *bucket_lock;
1308 int hash_id;
1309
1310 #if 0
1311 /*
1312 * we don't hold the page queue lock
1313 * so this check isn't safe to make
1314 */
1315 VM_PAGE_CHECK(mem);
1316 #endif
1317 vm_object_lock_assert_exclusive(object);
1318 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1319 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1320 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321 "already in (obj=%p,off=0x%llx)",
1322 mem, object, offset, mem->object, mem->offset);
1323 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1324 #endif
1325 /*
1326 * Record the object/offset pair in this page
1327 */
1328
1329 mem->object = object;
1330 mem->offset = offset;
1331
1332 /*
1333 * Insert it into the object_object/offset hash table,
1334 * replacing any page that might have been there.
1335 */
1336
1337 hash_id = vm_page_hash(object, offset);
1338 bucket = &vm_page_buckets[hash_id];
1339 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1340
1341 lck_spin_lock(bucket_lock);
1342
1343 if (bucket->page_list) {
1344 vm_page_packed_t *mp = &bucket->page_list;
1345 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1346
1347 do {
1348 if (m->object == object && m->offset == offset) {
1349 /*
1350 * Remove old page from hash list
1351 */
1352 *mp = m->next_m;
1353 m->hashed = FALSE;
1354
1355 found_m = m;
1356 break;
1357 }
1358 mp = &m->next_m;
1359 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1360
1361 mem->next_m = bucket->page_list;
1362 } else {
1363 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1364 }
1365 /*
1366 * insert new page at head of hash list
1367 */
1368 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1369 mem->hashed = TRUE;
1370
1371 lck_spin_unlock(bucket_lock);
1372
1373 if (found_m) {
1374 /*
1375 * there was already a page at the specified
1376 * offset for this object... remove it from
1377 * the object and free it back to the free list
1378 */
1379 vm_page_free_unlocked(found_m, FALSE);
1380 }
1381 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1382 }
1383
1384 /*
1385 * vm_page_remove: [ internal use only ]
1386 *
1387 * Removes the given mem entry from the object/offset-page
1388 * table and the object page list.
1389 *
1390 * The object must be locked.
1391 */
1392
1393 void
1394 vm_page_remove(
1395 vm_page_t mem,
1396 boolean_t remove_from_hash)
1397 {
1398 vm_page_bucket_t *bucket;
1399 vm_page_t this;
1400 lck_spin_t *bucket_lock;
1401 int hash_id;
1402 task_t owner;
1403
1404 XPR(XPR_VM_PAGE,
1405 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406 mem->object, mem->offset,
1407 mem, 0,0);
1408
1409 vm_object_lock_assert_exclusive(mem->object);
1410 assert(mem->tabled);
1411 assert(!mem->cleaning);
1412 assert(!mem->laundry);
1413 #if 0
1414 /*
1415 * we don't hold the page queue lock
1416 * so this check isn't safe to make
1417 */
1418 VM_PAGE_CHECK(mem);
1419 #endif
1420 if (remove_from_hash == TRUE) {
1421 /*
1422 * Remove from the object_object/offset hash table
1423 */
1424 hash_id = vm_page_hash(mem->object, mem->offset);
1425 bucket = &vm_page_buckets[hash_id];
1426 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1427
1428 lck_spin_lock(bucket_lock);
1429
1430 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1431 /* optimize for common case */
1432
1433 bucket->page_list = mem->next_m;
1434 } else {
1435 vm_page_packed_t *prev;
1436
1437 for (prev = &this->next_m;
1438 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1439 prev = &this->next_m)
1440 continue;
1441 *prev = this->next_m;
1442 }
1443 #if MACH_PAGE_HASH_STATS
1444 bucket->cur_count--;
1445 #endif /* MACH_PAGE_HASH_STATS */
1446 mem->hashed = FALSE;
1447 lck_spin_unlock(bucket_lock);
1448 }
1449 /*
1450 * Now remove from the object's list of backed pages.
1451 */
1452
1453 VM_PAGE_REMOVE(mem);
1454
1455 /*
1456 * And show that the object has one fewer resident
1457 * page.
1458 */
1459
1460 assert(mem->object->resident_page_count > 0);
1461 mem->object->resident_page_count--;
1462
1463 if (mem->object->internal) {
1464 #if DEBUG
1465 assert(vm_page_internal_count);
1466 #endif /* DEBUG */
1467
1468 OSAddAtomic(-1, &vm_page_internal_count);
1469 } else {
1470 assert(vm_page_external_count);
1471 OSAddAtomic(-1, &vm_page_external_count);
1472
1473 if (mem->xpmapped) {
1474 assert(vm_page_xpmapped_external_count);
1475 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1476 }
1477 }
1478 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1479 if (mem->object->resident_page_count == 0)
1480 vm_object_cache_remove(mem->object);
1481 }
1482
1483 if (VM_PAGE_WIRED(mem)) {
1484 assert(mem->object->wired_page_count > 0);
1485 mem->object->wired_page_count--;
1486 }
1487 assert(mem->object->resident_page_count >=
1488 mem->object->wired_page_count);
1489 if (mem->reusable) {
1490 assert(mem->object->reusable_page_count > 0);
1491 mem->object->reusable_page_count--;
1492 assert(mem->object->reusable_page_count <=
1493 mem->object->resident_page_count);
1494 mem->reusable = FALSE;
1495 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1496 vm_page_stats_reusable.reused_remove++;
1497 } else if (mem->object->all_reusable) {
1498 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1499 vm_page_stats_reusable.reused_remove++;
1500 }
1501
1502 if (mem->object->purgable == VM_PURGABLE_DENY) {
1503 owner = TASK_NULL;
1504 } else {
1505 owner = mem->object->vo_purgeable_owner;
1506 }
1507 if (owner &&
1508 (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1509 VM_PAGE_WIRED(mem))) {
1510 /* less non-volatile bytes */
1511 ledger_debit(owner->ledger,
1512 task_ledgers.purgeable_nonvolatile,
1513 PAGE_SIZE);
1514 /* less footprint */
1515 ledger_debit(owner->ledger,
1516 task_ledgers.phys_footprint,
1517 PAGE_SIZE);
1518 } else if (owner &&
1519 (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1520 mem->object->purgable == VM_PURGABLE_EMPTY)) {
1521 assert(! VM_PAGE_WIRED(mem));
1522 /* less volatile bytes */
1523 ledger_debit(owner->ledger,
1524 task_ledgers.purgeable_volatile,
1525 PAGE_SIZE);
1526 }
1527 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1528 if (VM_PAGE_WIRED(mem)) {
1529 assert(vm_page_purgeable_wired_count > 0);
1530 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1531 } else {
1532 assert(vm_page_purgeable_count > 0);
1533 OSAddAtomic(-1, &vm_page_purgeable_count);
1534 }
1535 }
1536 if (mem->object->set_cache_attr == TRUE)
1537 pmap_set_cache_attributes(mem->phys_page, 0);
1538
1539 mem->tabled = FALSE;
1540 mem->object = VM_OBJECT_NULL;
1541 mem->offset = (vm_object_offset_t) -1;
1542 }
1543
1544
1545 /*
1546 * vm_page_lookup:
1547 *
1548 * Returns the page associated with the object/offset
1549 * pair specified; if none is found, VM_PAGE_NULL is returned.
1550 *
1551 * The object must be locked. No side effects.
1552 */
1553
1554 unsigned long vm_page_lookup_hint = 0;
1555 unsigned long vm_page_lookup_hint_next = 0;
1556 unsigned long vm_page_lookup_hint_prev = 0;
1557 unsigned long vm_page_lookup_hint_miss = 0;
1558 unsigned long vm_page_lookup_bucket_NULL = 0;
1559 unsigned long vm_page_lookup_miss = 0;
1560
1561
1562 vm_page_t
1563 vm_page_lookup(
1564 vm_object_t object,
1565 vm_object_offset_t offset)
1566 {
1567 vm_page_t mem;
1568 vm_page_bucket_t *bucket;
1569 queue_entry_t qe;
1570 lck_spin_t *bucket_lock;
1571 int hash_id;
1572
1573 vm_object_lock_assert_held(object);
1574 mem = object->memq_hint;
1575
1576 if (mem != VM_PAGE_NULL) {
1577 assert(mem->object == object);
1578
1579 if (mem->offset == offset) {
1580 vm_page_lookup_hint++;
1581 return mem;
1582 }
1583 qe = queue_next(&mem->listq);
1584
1585 if (! queue_end(&object->memq, qe)) {
1586 vm_page_t next_page;
1587
1588 next_page = (vm_page_t) qe;
1589 assert(next_page->object == object);
1590
1591 if (next_page->offset == offset) {
1592 vm_page_lookup_hint_next++;
1593 object->memq_hint = next_page; /* new hint */
1594 return next_page;
1595 }
1596 }
1597 qe = queue_prev(&mem->listq);
1598
1599 if (! queue_end(&object->memq, qe)) {
1600 vm_page_t prev_page;
1601
1602 prev_page = (vm_page_t) qe;
1603 assert(prev_page->object == object);
1604
1605 if (prev_page->offset == offset) {
1606 vm_page_lookup_hint_prev++;
1607 object->memq_hint = prev_page; /* new hint */
1608 return prev_page;
1609 }
1610 }
1611 }
1612 /*
1613 * Search the hash table for this object/offset pair
1614 */
1615 hash_id = vm_page_hash(object, offset);
1616 bucket = &vm_page_buckets[hash_id];
1617
1618 /*
1619 * since we hold the object lock, we are guaranteed that no
1620 * new pages can be inserted into this object... this in turn
1621 * guarantess that the page we're looking for can't exist
1622 * if the bucket it hashes to is currently NULL even when looked
1623 * at outside the scope of the hash bucket lock... this is a
1624 * really cheap optimiztion to avoid taking the lock
1625 */
1626 if (!bucket->page_list) {
1627 vm_page_lookup_bucket_NULL++;
1628
1629 return (VM_PAGE_NULL);
1630 }
1631 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1632
1633 lck_spin_lock(bucket_lock);
1634
1635 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1636 #if 0
1637 /*
1638 * we don't hold the page queue lock
1639 * so this check isn't safe to make
1640 */
1641 VM_PAGE_CHECK(mem);
1642 #endif
1643 if ((mem->object == object) && (mem->offset == offset))
1644 break;
1645 }
1646 lck_spin_unlock(bucket_lock);
1647
1648 if (mem != VM_PAGE_NULL) {
1649 if (object->memq_hint != VM_PAGE_NULL) {
1650 vm_page_lookup_hint_miss++;
1651 }
1652 assert(mem->object == object);
1653 object->memq_hint = mem;
1654 } else
1655 vm_page_lookup_miss++;
1656
1657 return(mem);
1658 }
1659
1660
1661 /*
1662 * vm_page_rename:
1663 *
1664 * Move the given memory entry from its
1665 * current object to the specified target object/offset.
1666 *
1667 * The object must be locked.
1668 */
1669 void
1670 vm_page_rename(
1671 register vm_page_t mem,
1672 register vm_object_t new_object,
1673 vm_object_offset_t new_offset,
1674 boolean_t encrypted_ok)
1675 {
1676 boolean_t internal_to_external, external_to_internal;
1677
1678 assert(mem->object != new_object);
1679
1680 /*
1681 * ENCRYPTED SWAP:
1682 * The encryption key is based on the page's memory object
1683 * (aka "pager") and paging offset. Moving the page to
1684 * another VM object changes its "pager" and "paging_offset"
1685 * so it has to be decrypted first, or we would lose the key.
1686 *
1687 * One exception is VM object collapsing, where we transfer pages
1688 * from one backing object to its parent object. This operation also
1689 * transfers the paging information, so the <pager,paging_offset> info
1690 * should remain consistent. The caller (vm_object_do_collapse())
1691 * sets "encrypted_ok" in this case.
1692 */
1693 if (!encrypted_ok && mem->encrypted) {
1694 panic("vm_page_rename: page %p is encrypted\n", mem);
1695 }
1696
1697 XPR(XPR_VM_PAGE,
1698 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699 new_object, new_offset,
1700 mem, 0,0);
1701
1702 /*
1703 * Changes to mem->object require the page lock because
1704 * the pageout daemon uses that lock to get the object.
1705 */
1706 vm_page_lockspin_queues();
1707
1708 internal_to_external = FALSE;
1709 external_to_internal = FALSE;
1710
1711 if (mem->local) {
1712 /*
1713 * it's much easier to get the vm_page_pageable_xxx accounting correct
1714 * if we first move the page to the active queue... it's going to end
1715 * up there anyway, and we don't do vm_page_rename's frequently enough
1716 * for this to matter.
1717 */
1718 VM_PAGE_QUEUES_REMOVE(mem);
1719 vm_page_activate(mem);
1720 }
1721 if (mem->active || mem->inactive || mem->speculative) {
1722 if (mem->object->internal && !new_object->internal) {
1723 internal_to_external = TRUE;
1724 }
1725 if (!mem->object->internal && new_object->internal) {
1726 external_to_internal = TRUE;
1727 }
1728 }
1729
1730 vm_page_remove(mem, TRUE);
1731 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1732
1733 if (internal_to_external) {
1734 vm_page_pageable_internal_count--;
1735 vm_page_pageable_external_count++;
1736 } else if (external_to_internal) {
1737 vm_page_pageable_external_count--;
1738 vm_page_pageable_internal_count++;
1739 }
1740
1741 vm_page_unlock_queues();
1742 }
1743
1744 /*
1745 * vm_page_init:
1746 *
1747 * Initialize the fields in a new page.
1748 * This takes a structure with random values and initializes it
1749 * so that it can be given to vm_page_release or vm_page_insert.
1750 */
1751 void
1752 vm_page_init(
1753 vm_page_t mem,
1754 ppnum_t phys_page,
1755 boolean_t lopage)
1756 {
1757 assert(phys_page);
1758
1759 #if DEBUG
1760 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1761 if (!(pmap_valid_page(phys_page))) {
1762 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1763 }
1764 }
1765 #endif
1766 *mem = vm_page_template;
1767 mem->phys_page = phys_page;
1768 #if 0
1769 /*
1770 * we're leaving this turned off for now... currently pages
1771 * come off the free list and are either immediately dirtied/referenced
1772 * due to zero-fill or COW faults, or are used to read or write files...
1773 * in the file I/O case, the UPL mechanism takes care of clearing
1774 * the state of the HW ref/mod bits in a somewhat fragile way.
1775 * Since we may change the way this works in the future (to toughen it up),
1776 * I'm leaving this as a reminder of where these bits could get cleared
1777 */
1778
1779 /*
1780 * make sure both the h/w referenced and modified bits are
1781 * clear at this point... we are especially dependent on
1782 * not finding a 'stale' h/w modified in a number of spots
1783 * once this page goes back into use
1784 */
1785 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1786 #endif
1787 mem->lopage = lopage;
1788 }
1789
1790 /*
1791 * vm_page_grab_fictitious:
1792 *
1793 * Remove a fictitious page from the free list.
1794 * Returns VM_PAGE_NULL if there are no free pages.
1795 */
1796 int c_vm_page_grab_fictitious = 0;
1797 int c_vm_page_grab_fictitious_failed = 0;
1798 int c_vm_page_release_fictitious = 0;
1799 int c_vm_page_more_fictitious = 0;
1800
1801 vm_page_t
1802 vm_page_grab_fictitious_common(
1803 ppnum_t phys_addr)
1804 {
1805 vm_page_t m;
1806
1807 if ((m = (vm_page_t)zget(vm_page_zone))) {
1808
1809 vm_page_init(m, phys_addr, FALSE);
1810 m->fictitious = TRUE;
1811
1812 c_vm_page_grab_fictitious++;
1813 } else
1814 c_vm_page_grab_fictitious_failed++;
1815
1816 return m;
1817 }
1818
1819 vm_page_t
1820 vm_page_grab_fictitious(void)
1821 {
1822 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1823 }
1824
1825 vm_page_t
1826 vm_page_grab_guard(void)
1827 {
1828 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1829 }
1830
1831
1832 /*
1833 * vm_page_release_fictitious:
1834 *
1835 * Release a fictitious page to the zone pool
1836 */
1837 void
1838 vm_page_release_fictitious(
1839 vm_page_t m)
1840 {
1841 assert(!m->free);
1842 assert(m->fictitious);
1843 assert(m->phys_page == vm_page_fictitious_addr ||
1844 m->phys_page == vm_page_guard_addr);
1845
1846 c_vm_page_release_fictitious++;
1847
1848 zfree(vm_page_zone, m);
1849 }
1850
1851 /*
1852 * vm_page_more_fictitious:
1853 *
1854 * Add more fictitious pages to the zone.
1855 * Allowed to block. This routine is way intimate
1856 * with the zones code, for several reasons:
1857 * 1. we need to carve some page structures out of physical
1858 * memory before zones work, so they _cannot_ come from
1859 * the zone_map.
1860 * 2. the zone needs to be collectable in order to prevent
1861 * growth without bound. These structures are used by
1862 * the device pager (by the hundreds and thousands), as
1863 * private pages for pageout, and as blocking pages for
1864 * pagein. Temporary bursts in demand should not result in
1865 * permanent allocation of a resource.
1866 * 3. To smooth allocation humps, we allocate single pages
1867 * with kernel_memory_allocate(), and cram them into the
1868 * zone.
1869 */
1870
1871 void vm_page_more_fictitious(void)
1872 {
1873 vm_offset_t addr;
1874 kern_return_t retval;
1875
1876 c_vm_page_more_fictitious++;
1877
1878 /*
1879 * Allocate a single page from the zone_map. Do not wait if no physical
1880 * pages are immediately available, and do not zero the space. We need
1881 * our own blocking lock here to prevent having multiple,
1882 * simultaneous requests from piling up on the zone_map lock. Exactly
1883 * one (of our) threads should be potentially waiting on the map lock.
1884 * If winner is not vm-privileged, then the page allocation will fail,
1885 * and it will temporarily block here in the vm_page_wait().
1886 */
1887 lck_mtx_lock(&vm_page_alloc_lock);
1888 /*
1889 * If another thread allocated space, just bail out now.
1890 */
1891 if (zone_free_count(vm_page_zone) > 5) {
1892 /*
1893 * The number "5" is a small number that is larger than the
1894 * number of fictitious pages that any single caller will
1895 * attempt to allocate. Otherwise, a thread will attempt to
1896 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897 * release all of the resources and locks already acquired,
1898 * and then call this routine. This routine finds the pages
1899 * that the caller released, so fails to allocate new space.
1900 * The process repeats infinitely. The largest known number
1901 * of fictitious pages required in this manner is 2. 5 is
1902 * simply a somewhat larger number.
1903 */
1904 lck_mtx_unlock(&vm_page_alloc_lock);
1905 return;
1906 }
1907
1908 retval = kernel_memory_allocate(zone_map,
1909 &addr, PAGE_SIZE, VM_PROT_ALL,
1910 KMA_KOBJECT|KMA_NOPAGEWAIT);
1911 if (retval != KERN_SUCCESS) {
1912 /*
1913 * No page was available. Drop the
1914 * lock to give another thread a chance at it, and
1915 * wait for the pageout daemon to make progress.
1916 */
1917 lck_mtx_unlock(&vm_page_alloc_lock);
1918 vm_page_wait(THREAD_UNINT);
1919 return;
1920 }
1921
1922 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923 OSAddAtomic64(1, &(vm_page_zone->page_count));
1924
1925 zcram(vm_page_zone, addr, PAGE_SIZE);
1926
1927 lck_mtx_unlock(&vm_page_alloc_lock);
1928 }
1929
1930
1931 /*
1932 * vm_pool_low():
1933 *
1934 * Return true if it is not likely that a non-vm_privileged thread
1935 * can get memory without blocking. Advisory only, since the
1936 * situation may change under us.
1937 */
1938 int
1939 vm_pool_low(void)
1940 {
1941 /* No locking, at worst we will fib. */
1942 return( vm_page_free_count <= vm_page_free_reserved );
1943 }
1944
1945
1946
1947 /*
1948 * this is an interface to support bring-up of drivers
1949 * on platforms with physical memory > 4G...
1950 */
1951 int vm_himemory_mode = 2;
1952
1953
1954 /*
1955 * this interface exists to support hardware controllers
1956 * incapable of generating DMAs with more than 32 bits
1957 * of address on platforms with physical memory > 4G...
1958 */
1959 unsigned int vm_lopages_allocated_q = 0;
1960 unsigned int vm_lopages_allocated_cpm_success = 0;
1961 unsigned int vm_lopages_allocated_cpm_failed = 0;
1962 queue_head_t vm_lopage_queue_free;
1963
1964 vm_page_t
1965 vm_page_grablo(void)
1966 {
1967 vm_page_t mem;
1968
1969 if (vm_lopage_needed == FALSE)
1970 return (vm_page_grab());
1971
1972 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1973
1974 if ( !queue_empty(&vm_lopage_queue_free)) {
1975 queue_remove_first(&vm_lopage_queue_free,
1976 mem,
1977 vm_page_t,
1978 pageq);
1979 assert(vm_lopage_free_count);
1980
1981 vm_lopage_free_count--;
1982 vm_lopages_allocated_q++;
1983
1984 if (vm_lopage_free_count < vm_lopage_lowater)
1985 vm_lopage_refill = TRUE;
1986
1987 lck_mtx_unlock(&vm_page_queue_free_lock);
1988 } else {
1989 lck_mtx_unlock(&vm_page_queue_free_lock);
1990
1991 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1992
1993 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1994 vm_lopages_allocated_cpm_failed++;
1995 lck_mtx_unlock(&vm_page_queue_free_lock);
1996
1997 return (VM_PAGE_NULL);
1998 }
1999 mem->busy = TRUE;
2000
2001 vm_page_lockspin_queues();
2002
2003 mem->gobbled = FALSE;
2004 vm_page_gobble_count--;
2005 vm_page_wire_count--;
2006
2007 vm_lopages_allocated_cpm_success++;
2008 vm_page_unlock_queues();
2009 }
2010 assert(mem->busy);
2011 assert(!mem->free);
2012 assert(!mem->pmapped);
2013 assert(!mem->wpmapped);
2014 assert(!pmap_is_noencrypt(mem->phys_page));
2015
2016 mem->pageq.next = NULL;
2017 mem->pageq.prev = NULL;
2018
2019 return (mem);
2020 }
2021
2022
2023 /*
2024 * vm_page_grab:
2025 *
2026 * first try to grab a page from the per-cpu free list...
2027 * this must be done while pre-emption is disabled... if
2028 * a page is available, we're done...
2029 * if no page is available, grab the vm_page_queue_free_lock
2030 * and see if current number of free pages would allow us
2031 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2032 * if there are pages available, disable preemption and
2033 * recheck the state of the per-cpu free list... we could
2034 * have been preempted and moved to a different cpu, or
2035 * some other thread could have re-filled it... if still
2036 * empty, figure out how many pages we can steal from the
2037 * global free queue and move to the per-cpu queue...
2038 * return 1 of these pages when done... only wakeup the
2039 * pageout_scan thread if we moved pages from the global
2040 * list... no need for the wakeup if we've satisfied the
2041 * request from the per-cpu queue.
2042 */
2043
2044
2045 vm_page_t
2046 vm_page_grab( void )
2047 {
2048 vm_page_t mem;
2049
2050
2051 disable_preemption();
2052
2053 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2054 return_page_from_cpu_list:
2055 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2056 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2057
2058 enable_preemption();
2059 mem->pageq.next = NULL;
2060
2061 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2062 assert(mem->tabled == FALSE);
2063 assert(mem->object == VM_OBJECT_NULL);
2064 assert(!mem->laundry);
2065 assert(!mem->free);
2066 assert(pmap_verify_free(mem->phys_page));
2067 assert(mem->busy);
2068 assert(!mem->encrypted);
2069 assert(!mem->pmapped);
2070 assert(!mem->wpmapped);
2071 assert(!mem->active);
2072 assert(!mem->inactive);
2073 assert(!mem->throttled);
2074 assert(!mem->speculative);
2075 assert(!pmap_is_noencrypt(mem->phys_page));
2076
2077 return mem;
2078 }
2079 enable_preemption();
2080
2081
2082 /*
2083 * Optionally produce warnings if the wire or gobble
2084 * counts exceed some threshold.
2085 */
2086 #if VM_PAGE_WIRE_COUNT_WARNING
2087 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2088 printf("mk: vm_page_grab(): high wired page count of %d\n",
2089 vm_page_wire_count);
2090 }
2091 #endif
2092 #if VM_PAGE_GOBBLE_COUNT_WARNING
2093 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2094 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095 vm_page_gobble_count);
2096 }
2097 #endif
2098 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2099
2100 /*
2101 * Only let privileged threads (involved in pageout)
2102 * dip into the reserved pool.
2103 */
2104 if ((vm_page_free_count < vm_page_free_reserved) &&
2105 !(current_thread()->options & TH_OPT_VMPRIV)) {
2106 lck_mtx_unlock(&vm_page_queue_free_lock);
2107 mem = VM_PAGE_NULL;
2108 }
2109 else {
2110 vm_page_t head;
2111 vm_page_t tail;
2112 unsigned int pages_to_steal;
2113 unsigned int color;
2114
2115 while ( vm_page_free_count == 0 ) {
2116
2117 lck_mtx_unlock(&vm_page_queue_free_lock);
2118 /*
2119 * must be a privileged thread to be
2120 * in this state since a non-privileged
2121 * thread would have bailed if we were
2122 * under the vm_page_free_reserved mark
2123 */
2124 VM_PAGE_WAIT();
2125 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2126 }
2127
2128 disable_preemption();
2129
2130 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2131 lck_mtx_unlock(&vm_page_queue_free_lock);
2132
2133 /*
2134 * we got preempted and moved to another processor
2135 * or we got preempted and someone else ran and filled the cache
2136 */
2137 goto return_page_from_cpu_list;
2138 }
2139 if (vm_page_free_count <= vm_page_free_reserved)
2140 pages_to_steal = 1;
2141 else {
2142 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2143 pages_to_steal = vm_free_magazine_refill_limit;
2144 else
2145 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2146 }
2147 color = PROCESSOR_DATA(current_processor(), start_color);
2148 head = tail = NULL;
2149
2150 vm_page_free_count -= pages_to_steal;
2151
2152 while (pages_to_steal--) {
2153
2154 while (queue_empty(&vm_page_queue_free[color]))
2155 color = (color + 1) & vm_color_mask;
2156
2157 queue_remove_first(&vm_page_queue_free[color],
2158 mem,
2159 vm_page_t,
2160 pageq);
2161 mem->pageq.next = NULL;
2162 mem->pageq.prev = NULL;
2163
2164 assert(!mem->active);
2165 assert(!mem->inactive);
2166 assert(!mem->throttled);
2167 assert(!mem->speculative);
2168
2169 color = (color + 1) & vm_color_mask;
2170
2171 if (head == NULL)
2172 head = mem;
2173 else
2174 tail->pageq.next = (queue_t)mem;
2175 tail = mem;
2176
2177 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2178 assert(mem->tabled == FALSE);
2179 assert(mem->object == VM_OBJECT_NULL);
2180 assert(!mem->laundry);
2181 assert(mem->free);
2182 mem->free = FALSE;
2183
2184 assert(pmap_verify_free(mem->phys_page));
2185 assert(mem->busy);
2186 assert(!mem->free);
2187 assert(!mem->encrypted);
2188 assert(!mem->pmapped);
2189 assert(!mem->wpmapped);
2190 assert(!pmap_is_noencrypt(mem->phys_page));
2191 }
2192 lck_mtx_unlock(&vm_page_queue_free_lock);
2193
2194 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2195 PROCESSOR_DATA(current_processor(), start_color) = color;
2196
2197 /*
2198 * satisfy this request
2199 */
2200 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2201 mem = head;
2202 mem->pageq.next = NULL;
2203
2204 enable_preemption();
2205 }
2206 /*
2207 * Decide if we should poke the pageout daemon.
2208 * We do this if the free count is less than the low
2209 * water mark, or if the free count is less than the high
2210 * water mark (but above the low water mark) and the inactive
2211 * count is less than its target.
2212 *
2213 * We don't have the counts locked ... if they change a little,
2214 * it doesn't really matter.
2215 */
2216 if ((vm_page_free_count < vm_page_free_min) ||
2217 ((vm_page_free_count < vm_page_free_target) &&
2218 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2219 thread_wakeup((event_t) &vm_page_free_wanted);
2220
2221 VM_CHECK_MEMORYSTATUS;
2222
2223 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2224
2225 return mem;
2226 }
2227
2228 /*
2229 * vm_page_release:
2230 *
2231 * Return a page to the free list.
2232 */
2233
2234 void
2235 vm_page_release(
2236 register vm_page_t mem)
2237 {
2238 unsigned int color;
2239 int need_wakeup = 0;
2240 int need_priv_wakeup = 0;
2241
2242
2243 assert(!mem->private && !mem->fictitious);
2244 if (vm_page_free_verify) {
2245 assert(pmap_verify_free(mem->phys_page));
2246 }
2247 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2248
2249 pmap_clear_noencrypt(mem->phys_page);
2250
2251 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2252 #if DEBUG
2253 if (mem->free)
2254 panic("vm_page_release");
2255 #endif
2256
2257 assert(mem->busy);
2258 assert(!mem->laundry);
2259 assert(mem->object == VM_OBJECT_NULL);
2260 assert(mem->pageq.next == NULL &&
2261 mem->pageq.prev == NULL);
2262 assert(mem->listq.next == NULL &&
2263 mem->listq.prev == NULL);
2264
2265 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2266 vm_lopage_free_count < vm_lopage_free_limit &&
2267 mem->phys_page < max_valid_low_ppnum) {
2268 /*
2269 * this exists to support hardware controllers
2270 * incapable of generating DMAs with more than 32 bits
2271 * of address on platforms with physical memory > 4G...
2272 */
2273 queue_enter_first(&vm_lopage_queue_free,
2274 mem,
2275 vm_page_t,
2276 pageq);
2277 vm_lopage_free_count++;
2278
2279 if (vm_lopage_free_count >= vm_lopage_free_limit)
2280 vm_lopage_refill = FALSE;
2281
2282 mem->lopage = TRUE;
2283 } else {
2284 mem->lopage = FALSE;
2285 mem->free = TRUE;
2286
2287 color = mem->phys_page & vm_color_mask;
2288 queue_enter_first(&vm_page_queue_free[color],
2289 mem,
2290 vm_page_t,
2291 pageq);
2292 vm_page_free_count++;
2293 /*
2294 * Check if we should wake up someone waiting for page.
2295 * But don't bother waking them unless they can allocate.
2296 *
2297 * We wakeup only one thread, to prevent starvation.
2298 * Because the scheduling system handles wait queues FIFO,
2299 * if we wakeup all waiting threads, one greedy thread
2300 * can starve multiple niceguy threads. When the threads
2301 * all wakeup, the greedy threads runs first, grabs the page,
2302 * and waits for another page. It will be the first to run
2303 * when the next page is freed.
2304 *
2305 * However, there is a slight danger here.
2306 * The thread we wake might not use the free page.
2307 * Then the other threads could wait indefinitely
2308 * while the page goes unused. To forestall this,
2309 * the pageout daemon will keep making free pages
2310 * as long as vm_page_free_wanted is non-zero.
2311 */
2312
2313 assert(vm_page_free_count > 0);
2314 if (vm_page_free_wanted_privileged > 0) {
2315 vm_page_free_wanted_privileged--;
2316 need_priv_wakeup = 1;
2317 } else if (vm_page_free_wanted > 0 &&
2318 vm_page_free_count > vm_page_free_reserved) {
2319 vm_page_free_wanted--;
2320 need_wakeup = 1;
2321 }
2322 }
2323 lck_mtx_unlock(&vm_page_queue_free_lock);
2324
2325 if (need_priv_wakeup)
2326 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2327 else if (need_wakeup)
2328 thread_wakeup_one((event_t) &vm_page_free_count);
2329
2330 VM_CHECK_MEMORYSTATUS;
2331 }
2332
2333 /*
2334 * This version of vm_page_release() is used only at startup
2335 * when we are single-threaded and pages are being released
2336 * for the first time. Hence, no locking or unnecessary checks are made.
2337 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2338 */
2339 void
2340 vm_page_release_startup(
2341 register vm_page_t mem)
2342 {
2343 queue_t queue_free;
2344
2345 if (vm_lopage_free_count < vm_lopage_free_limit &&
2346 mem->phys_page < max_valid_low_ppnum) {
2347 mem->lopage = TRUE;
2348 vm_lopage_free_count++;
2349 queue_free = &vm_lopage_queue_free;
2350 } else {
2351 mem->lopage = FALSE;
2352 mem->free = TRUE;
2353 vm_page_free_count++;
2354 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2355 }
2356 queue_enter_first(queue_free, mem, vm_page_t, pageq);
2357 }
2358
2359 /*
2360 * vm_page_wait:
2361 *
2362 * Wait for a page to become available.
2363 * If there are plenty of free pages, then we don't sleep.
2364 *
2365 * Returns:
2366 * TRUE: There may be another page, try again
2367 * FALSE: We were interrupted out of our wait, don't try again
2368 */
2369
2370 boolean_t
2371 vm_page_wait(
2372 int interruptible )
2373 {
2374 /*
2375 * We can't use vm_page_free_reserved to make this
2376 * determination. Consider: some thread might
2377 * need to allocate two pages. The first allocation
2378 * succeeds, the second fails. After the first page is freed,
2379 * a call to vm_page_wait must really block.
2380 */
2381 kern_return_t wait_result;
2382 int need_wakeup = 0;
2383 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2384
2385 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2386
2387 if (is_privileged && vm_page_free_count) {
2388 lck_mtx_unlock(&vm_page_queue_free_lock);
2389 return TRUE;
2390 }
2391 if (vm_page_free_count < vm_page_free_target) {
2392
2393 if (is_privileged) {
2394 if (vm_page_free_wanted_privileged++ == 0)
2395 need_wakeup = 1;
2396 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2397 } else {
2398 if (vm_page_free_wanted++ == 0)
2399 need_wakeup = 1;
2400 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2401 }
2402 lck_mtx_unlock(&vm_page_queue_free_lock);
2403 counter(c_vm_page_wait_block++);
2404
2405 if (need_wakeup)
2406 thread_wakeup((event_t)&vm_page_free_wanted);
2407
2408 if (wait_result == THREAD_WAITING) {
2409 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2410 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2411 wait_result = thread_block(THREAD_CONTINUE_NULL);
2412 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2413 }
2414
2415 return(wait_result == THREAD_AWAKENED);
2416 } else {
2417 lck_mtx_unlock(&vm_page_queue_free_lock);
2418 return TRUE;
2419 }
2420 }
2421
2422 /*
2423 * vm_page_alloc:
2424 *
2425 * Allocate and return a memory cell associated
2426 * with this VM object/offset pair.
2427 *
2428 * Object must be locked.
2429 */
2430
2431 vm_page_t
2432 vm_page_alloc(
2433 vm_object_t object,
2434 vm_object_offset_t offset)
2435 {
2436 register vm_page_t mem;
2437
2438 vm_object_lock_assert_exclusive(object);
2439 mem = vm_page_grab();
2440 if (mem == VM_PAGE_NULL)
2441 return VM_PAGE_NULL;
2442
2443 vm_page_insert(mem, object, offset);
2444
2445 return(mem);
2446 }
2447
2448 vm_page_t
2449 vm_page_alloclo(
2450 vm_object_t object,
2451 vm_object_offset_t offset)
2452 {
2453 register vm_page_t mem;
2454
2455 vm_object_lock_assert_exclusive(object);
2456 mem = vm_page_grablo();
2457 if (mem == VM_PAGE_NULL)
2458 return VM_PAGE_NULL;
2459
2460 vm_page_insert(mem, object, offset);
2461
2462 return(mem);
2463 }
2464
2465
2466 /*
2467 * vm_page_alloc_guard:
2468 *
2469 * Allocate a fictitious page which will be used
2470 * as a guard page. The page will be inserted into
2471 * the object and returned to the caller.
2472 */
2473
2474 vm_page_t
2475 vm_page_alloc_guard(
2476 vm_object_t object,
2477 vm_object_offset_t offset)
2478 {
2479 register vm_page_t mem;
2480
2481 vm_object_lock_assert_exclusive(object);
2482 mem = vm_page_grab_guard();
2483 if (mem == VM_PAGE_NULL)
2484 return VM_PAGE_NULL;
2485
2486 vm_page_insert(mem, object, offset);
2487
2488 return(mem);
2489 }
2490
2491
2492 counter(unsigned int c_laundry_pages_freed = 0;)
2493
2494 /*
2495 * vm_page_free_prepare:
2496 *
2497 * Removes page from any queue it may be on
2498 * and disassociates it from its VM object.
2499 *
2500 * Object and page queues must be locked prior to entry.
2501 */
2502 static void
2503 vm_page_free_prepare(
2504 vm_page_t mem)
2505 {
2506 vm_page_free_prepare_queues(mem);
2507 vm_page_free_prepare_object(mem, TRUE);
2508 }
2509
2510
2511 void
2512 vm_page_free_prepare_queues(
2513 vm_page_t mem)
2514 {
2515 VM_PAGE_CHECK(mem);
2516 assert(!mem->free);
2517 assert(!mem->cleaning);
2518
2519 #if MACH_ASSERT || DEBUG
2520 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2521 if (mem->free)
2522 panic("vm_page_free: freeing page on free list\n");
2523 #endif /* MACH_ASSERT || DEBUG */
2524 if (mem->object) {
2525 vm_object_lock_assert_exclusive(mem->object);
2526 }
2527 if (mem->laundry) {
2528 /*
2529 * We may have to free a page while it's being laundered
2530 * if we lost its pager (due to a forced unmount, for example).
2531 * We need to call vm_pageout_steal_laundry() before removing
2532 * the page from its VM object, so that we can remove it
2533 * from its pageout queue and adjust the laundry accounting
2534 */
2535 vm_pageout_steal_laundry(mem, TRUE);
2536 counter(++c_laundry_pages_freed);
2537 }
2538
2539 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2540
2541 if (VM_PAGE_WIRED(mem)) {
2542 if (mem->object) {
2543 assert(mem->object->wired_page_count > 0);
2544 mem->object->wired_page_count--;
2545 assert(mem->object->resident_page_count >=
2546 mem->object->wired_page_count);
2547
2548 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2549 OSAddAtomic(+1, &vm_page_purgeable_count);
2550 assert(vm_page_purgeable_wired_count > 0);
2551 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2552 }
2553 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2554 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2555 mem->object->vo_purgeable_owner != TASK_NULL) {
2556 task_t owner;
2557
2558 owner = mem->object->vo_purgeable_owner;
2559 /*
2560 * While wired, this page was accounted
2561 * as "non-volatile" but it should now
2562 * be accounted as "volatile".
2563 */
2564 /* one less "non-volatile"... */
2565 ledger_debit(owner->ledger,
2566 task_ledgers.purgeable_nonvolatile,
2567 PAGE_SIZE);
2568 /* ... and "phys_footprint" */
2569 ledger_debit(owner->ledger,
2570 task_ledgers.phys_footprint,
2571 PAGE_SIZE);
2572 /* one more "volatile" */
2573 ledger_credit(owner->ledger,
2574 task_ledgers.purgeable_volatile,
2575 PAGE_SIZE);
2576 }
2577 }
2578 if (!mem->private && !mem->fictitious)
2579 vm_page_wire_count--;
2580 mem->wire_count = 0;
2581 assert(!mem->gobbled);
2582 } else if (mem->gobbled) {
2583 if (!mem->private && !mem->fictitious)
2584 vm_page_wire_count--;
2585 vm_page_gobble_count--;
2586 }
2587 }
2588
2589
2590 void
2591 vm_page_free_prepare_object(
2592 vm_page_t mem,
2593 boolean_t remove_from_hash)
2594 {
2595 if (mem->tabled)
2596 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2597
2598 PAGE_WAKEUP(mem); /* clears wanted */
2599
2600 if (mem->private) {
2601 mem->private = FALSE;
2602 mem->fictitious = TRUE;
2603 mem->phys_page = vm_page_fictitious_addr;
2604 }
2605 if ( !mem->fictitious) {
2606 vm_page_init(mem, mem->phys_page, mem->lopage);
2607 }
2608 }
2609
2610
2611 /*
2612 * vm_page_free:
2613 *
2614 * Returns the given page to the free list,
2615 * disassociating it with any VM object.
2616 *
2617 * Object and page queues must be locked prior to entry.
2618 */
2619 void
2620 vm_page_free(
2621 vm_page_t mem)
2622 {
2623 vm_page_free_prepare(mem);
2624
2625 if (mem->fictitious) {
2626 vm_page_release_fictitious(mem);
2627 } else {
2628 vm_page_release(mem);
2629 }
2630 }
2631
2632
2633 void
2634 vm_page_free_unlocked(
2635 vm_page_t mem,
2636 boolean_t remove_from_hash)
2637 {
2638 vm_page_lockspin_queues();
2639 vm_page_free_prepare_queues(mem);
2640 vm_page_unlock_queues();
2641
2642 vm_page_free_prepare_object(mem, remove_from_hash);
2643
2644 if (mem->fictitious) {
2645 vm_page_release_fictitious(mem);
2646 } else {
2647 vm_page_release(mem);
2648 }
2649 }
2650
2651
2652 /*
2653 * Free a list of pages. The list can be up to several hundred pages,
2654 * as blocked up by vm_pageout_scan().
2655 * The big win is not having to take the free list lock once
2656 * per page.
2657 */
2658 void
2659 vm_page_free_list(
2660 vm_page_t freeq,
2661 boolean_t prepare_object)
2662 {
2663 vm_page_t mem;
2664 vm_page_t nxt;
2665 vm_page_t local_freeq;
2666 int pg_count;
2667
2668 while (freeq) {
2669
2670 pg_count = 0;
2671 local_freeq = VM_PAGE_NULL;
2672 mem = freeq;
2673
2674 /*
2675 * break up the processing into smaller chunks so
2676 * that we can 'pipeline' the pages onto the
2677 * free list w/o introducing too much
2678 * contention on the global free queue lock
2679 */
2680 while (mem && pg_count < 64) {
2681
2682 assert(!mem->inactive);
2683 assert(!mem->active);
2684 assert(!mem->throttled);
2685 assert(!mem->free);
2686 assert(!mem->speculative);
2687 assert(!VM_PAGE_WIRED(mem));
2688 assert(mem->pageq.prev == NULL);
2689
2690 nxt = (vm_page_t)(mem->pageq.next);
2691
2692 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2693 assert(pmap_verify_free(mem->phys_page));
2694 }
2695 if (prepare_object == TRUE)
2696 vm_page_free_prepare_object(mem, TRUE);
2697
2698 if (!mem->fictitious) {
2699 assert(mem->busy);
2700
2701 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2702 vm_lopage_free_count < vm_lopage_free_limit &&
2703 mem->phys_page < max_valid_low_ppnum) {
2704 mem->pageq.next = NULL;
2705 vm_page_release(mem);
2706 } else {
2707 /*
2708 * IMPORTANT: we can't set the page "free" here
2709 * because that would make the page eligible for
2710 * a physically-contiguous allocation (see
2711 * vm_page_find_contiguous()) right away (we don't
2712 * hold the vm_page_queue_free lock). That would
2713 * cause trouble because the page is not actually
2714 * in the free queue yet...
2715 */
2716 mem->pageq.next = (queue_entry_t)local_freeq;
2717 local_freeq = mem;
2718 pg_count++;
2719
2720 pmap_clear_noencrypt(mem->phys_page);
2721 }
2722 } else {
2723 assert(mem->phys_page == vm_page_fictitious_addr ||
2724 mem->phys_page == vm_page_guard_addr);
2725 vm_page_release_fictitious(mem);
2726 }
2727 mem = nxt;
2728 }
2729 freeq = mem;
2730
2731 if ( (mem = local_freeq) ) {
2732 unsigned int avail_free_count;
2733 unsigned int need_wakeup = 0;
2734 unsigned int need_priv_wakeup = 0;
2735
2736 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2737
2738 while (mem) {
2739 int color;
2740
2741 nxt = (vm_page_t)(mem->pageq.next);
2742
2743 assert(!mem->free);
2744 assert(mem->busy);
2745 mem->free = TRUE;
2746
2747 color = mem->phys_page & vm_color_mask;
2748 queue_enter_first(&vm_page_queue_free[color],
2749 mem,
2750 vm_page_t,
2751 pageq);
2752 mem = nxt;
2753 }
2754 vm_page_free_count += pg_count;
2755 avail_free_count = vm_page_free_count;
2756
2757 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2758
2759 if (avail_free_count < vm_page_free_wanted_privileged) {
2760 need_priv_wakeup = avail_free_count;
2761 vm_page_free_wanted_privileged -= avail_free_count;
2762 avail_free_count = 0;
2763 } else {
2764 need_priv_wakeup = vm_page_free_wanted_privileged;
2765 vm_page_free_wanted_privileged = 0;
2766 avail_free_count -= vm_page_free_wanted_privileged;
2767 }
2768 }
2769 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2770 unsigned int available_pages;
2771
2772 available_pages = avail_free_count - vm_page_free_reserved;
2773
2774 if (available_pages >= vm_page_free_wanted) {
2775 need_wakeup = vm_page_free_wanted;
2776 vm_page_free_wanted = 0;
2777 } else {
2778 need_wakeup = available_pages;
2779 vm_page_free_wanted -= available_pages;
2780 }
2781 }
2782 lck_mtx_unlock(&vm_page_queue_free_lock);
2783
2784 if (need_priv_wakeup != 0) {
2785 /*
2786 * There shouldn't be that many VM-privileged threads,
2787 * so let's wake them all up, even if we don't quite
2788 * have enough pages to satisfy them all.
2789 */
2790 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2791 }
2792 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2793 /*
2794 * We don't expect to have any more waiters
2795 * after this, so let's wake them all up at
2796 * once.
2797 */
2798 thread_wakeup((event_t) &vm_page_free_count);
2799 } else for (; need_wakeup != 0; need_wakeup--) {
2800 /*
2801 * Wake up one waiter per page we just released.
2802 */
2803 thread_wakeup_one((event_t) &vm_page_free_count);
2804 }
2805
2806 VM_CHECK_MEMORYSTATUS;
2807 }
2808 }
2809 }
2810
2811
2812 /*
2813 * vm_page_wire:
2814 *
2815 * Mark this page as wired down by yet
2816 * another map, removing it from paging queues
2817 * as necessary.
2818 *
2819 * The page's object and the page queues must be locked.
2820 */
2821 void
2822 vm_page_wire(
2823 register vm_page_t mem)
2824 {
2825
2826 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2827
2828 VM_PAGE_CHECK(mem);
2829 if (mem->object) {
2830 vm_object_lock_assert_exclusive(mem->object);
2831 } else {
2832 /*
2833 * In theory, the page should be in an object before it
2834 * gets wired, since we need to hold the object lock
2835 * to update some fields in the page structure.
2836 * However, some code (i386 pmap, for example) might want
2837 * to wire a page before it gets inserted into an object.
2838 * That's somewhat OK, as long as nobody else can get to
2839 * that page and update it at the same time.
2840 */
2841 }
2842 #if DEBUG
2843 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2844 #endif
2845 if ( !VM_PAGE_WIRED(mem)) {
2846
2847 if (mem->pageout_queue) {
2848 mem->pageout = FALSE;
2849 vm_pageout_throttle_up(mem);
2850 }
2851 VM_PAGE_QUEUES_REMOVE(mem);
2852
2853 if (mem->object) {
2854 mem->object->wired_page_count++;
2855 assert(mem->object->resident_page_count >=
2856 mem->object->wired_page_count);
2857 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2858 assert(vm_page_purgeable_count > 0);
2859 OSAddAtomic(-1, &vm_page_purgeable_count);
2860 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2861 }
2862 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2863 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2864 mem->object->vo_purgeable_owner != TASK_NULL) {
2865 task_t owner;
2866
2867 owner = mem->object->vo_purgeable_owner;
2868 /* less volatile bytes */
2869 ledger_debit(owner->ledger,
2870 task_ledgers.purgeable_volatile,
2871 PAGE_SIZE);
2872 /* more not-quite-volatile bytes */
2873 ledger_credit(owner->ledger,
2874 task_ledgers.purgeable_nonvolatile,
2875 PAGE_SIZE);
2876 /* more footprint */
2877 ledger_credit(owner->ledger,
2878 task_ledgers.phys_footprint,
2879 PAGE_SIZE);
2880 }
2881 if (mem->object->all_reusable) {
2882 /*
2883 * Wired pages are not counted as "re-usable"
2884 * in "all_reusable" VM objects, so nothing
2885 * to do here.
2886 */
2887 } else if (mem->reusable) {
2888 /*
2889 * This page is not "re-usable" when it's
2890 * wired, so adjust its state and the
2891 * accounting.
2892 */
2893 vm_object_reuse_pages(mem->object,
2894 mem->offset,
2895 mem->offset+PAGE_SIZE_64,
2896 FALSE);
2897 }
2898 }
2899 assert(!mem->reusable);
2900
2901 if (!mem->private && !mem->fictitious && !mem->gobbled)
2902 vm_page_wire_count++;
2903 if (mem->gobbled)
2904 vm_page_gobble_count--;
2905 mem->gobbled = FALSE;
2906
2907 VM_CHECK_MEMORYSTATUS;
2908
2909 /*
2910 * ENCRYPTED SWAP:
2911 * The page could be encrypted, but
2912 * We don't have to decrypt it here
2913 * because we don't guarantee that the
2914 * data is actually valid at this point.
2915 * The page will get decrypted in
2916 * vm_fault_wire() if needed.
2917 */
2918 }
2919 assert(!mem->gobbled);
2920 mem->wire_count++;
2921 VM_PAGE_CHECK(mem);
2922 }
2923
2924 /*
2925 * vm_page_gobble:
2926 *
2927 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2928 *
2929 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2930 */
2931 void
2932 vm_page_gobble(
2933 register vm_page_t mem)
2934 {
2935 vm_page_lockspin_queues();
2936 VM_PAGE_CHECK(mem);
2937
2938 assert(!mem->gobbled);
2939 assert( !VM_PAGE_WIRED(mem));
2940
2941 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2942 if (!mem->private && !mem->fictitious)
2943 vm_page_wire_count++;
2944 }
2945 vm_page_gobble_count++;
2946 mem->gobbled = TRUE;
2947 vm_page_unlock_queues();
2948 }
2949
2950 /*
2951 * vm_page_unwire:
2952 *
2953 * Release one wiring of this page, potentially
2954 * enabling it to be paged again.
2955 *
2956 * The page's object and the page queues must be locked.
2957 */
2958 void
2959 vm_page_unwire(
2960 vm_page_t mem,
2961 boolean_t queueit)
2962 {
2963
2964 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2965
2966 VM_PAGE_CHECK(mem);
2967 assert(VM_PAGE_WIRED(mem));
2968 assert(mem->object != VM_OBJECT_NULL);
2969 #if DEBUG
2970 vm_object_lock_assert_exclusive(mem->object);
2971 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2972 #endif
2973 if (--mem->wire_count == 0) {
2974 assert(!mem->private && !mem->fictitious);
2975 vm_page_wire_count--;
2976 assert(mem->object->wired_page_count > 0);
2977 mem->object->wired_page_count--;
2978 assert(mem->object->resident_page_count >=
2979 mem->object->wired_page_count);
2980 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2981 OSAddAtomic(+1, &vm_page_purgeable_count);
2982 assert(vm_page_purgeable_wired_count > 0);
2983 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2984 }
2985 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2986 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2987 mem->object->vo_purgeable_owner != TASK_NULL) {
2988 task_t owner;
2989
2990 owner = mem->object->vo_purgeable_owner;
2991 /* more volatile bytes */
2992 ledger_credit(owner->ledger,
2993 task_ledgers.purgeable_volatile,
2994 PAGE_SIZE);
2995 /* less not-quite-volatile bytes */
2996 ledger_debit(owner->ledger,
2997 task_ledgers.purgeable_nonvolatile,
2998 PAGE_SIZE);
2999 /* less footprint */
3000 ledger_debit(owner->ledger,
3001 task_ledgers.phys_footprint,
3002 PAGE_SIZE);
3003 }
3004 assert(mem->object != kernel_object);
3005 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3006
3007 if (queueit == TRUE) {
3008 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3009 vm_page_deactivate(mem);
3010 } else {
3011 vm_page_activate(mem);
3012 }
3013 }
3014
3015 VM_CHECK_MEMORYSTATUS;
3016
3017 }
3018 VM_PAGE_CHECK(mem);
3019 }
3020
3021 /*
3022 * vm_page_deactivate:
3023 *
3024 * Returns the given page to the inactive list,
3025 * indicating that no physical maps have access
3026 * to this page. [Used by the physical mapping system.]
3027 *
3028 * The page queues must be locked.
3029 */
3030 void
3031 vm_page_deactivate(
3032 vm_page_t m)
3033 {
3034 vm_page_deactivate_internal(m, TRUE);
3035 }
3036
3037
3038 void
3039 vm_page_deactivate_internal(
3040 vm_page_t m,
3041 boolean_t clear_hw_reference)
3042 {
3043
3044 VM_PAGE_CHECK(m);
3045 assert(m->object != kernel_object);
3046 assert(m->phys_page != vm_page_guard_addr);
3047
3048 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3049 #if DEBUG
3050 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052 /*
3053 * This page is no longer very interesting. If it was
3054 * interesting (active or inactive/referenced), then we
3055 * clear the reference bit and (re)enter it in the
3056 * inactive queue. Note wired pages should not have
3057 * their reference bit cleared.
3058 */
3059 assert ( !(m->absent && !m->unusual));
3060
3061 if (m->gobbled) { /* can this happen? */
3062 assert( !VM_PAGE_WIRED(m));
3063
3064 if (!m->private && !m->fictitious)
3065 vm_page_wire_count--;
3066 vm_page_gobble_count--;
3067 m->gobbled = FALSE;
3068 }
3069 /*
3070 * if this page is currently on the pageout queue, we can't do the
3071 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072 * and we can't remove it manually since we would need the object lock
3073 * (which is not required here) to decrement the activity_in_progress
3074 * reference which is held on the object while the page is in the pageout queue...
3075 * just let the normal laundry processing proceed
3076 */
3077 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3078 return;
3079
3080 if (!m->absent && clear_hw_reference == TRUE)
3081 pmap_clear_reference(m->phys_page);
3082
3083 m->reference = FALSE;
3084 m->no_cache = FALSE;
3085
3086 if (!m->inactive) {
3087 VM_PAGE_QUEUES_REMOVE(m);
3088
3089 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3090 m->dirty && m->object->internal &&
3091 (m->object->purgable == VM_PURGABLE_DENY ||
3092 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3093 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3094 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3095 m->throttled = TRUE;
3096 vm_page_throttled_count++;
3097 } else {
3098 if (m->object->named && m->object->ref_count == 1) {
3099 vm_page_speculate(m, FALSE);
3100 #if DEVELOPMENT || DEBUG
3101 vm_page_speculative_recreated++;
3102 #endif
3103 } else {
3104 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3105 }
3106 }
3107 }
3108 }
3109
3110 /*
3111 * vm_page_enqueue_cleaned
3112 *
3113 * Put the page on the cleaned queue, mark it cleaned, etc.
3114 * Being on the cleaned queue (and having m->clean_queue set)
3115 * does ** NOT ** guarantee that the page is clean!
3116 *
3117 * Call with the queues lock held.
3118 */
3119
3120 void vm_page_enqueue_cleaned(vm_page_t m)
3121 {
3122 assert(m->phys_page != vm_page_guard_addr);
3123 #if DEBUG
3124 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3125 #endif
3126 assert( !(m->absent && !m->unusual));
3127
3128 if (m->gobbled) {
3129 assert( !VM_PAGE_WIRED(m));
3130 if (!m->private && !m->fictitious)
3131 vm_page_wire_count--;
3132 vm_page_gobble_count--;
3133 m->gobbled = FALSE;
3134 }
3135 /*
3136 * if this page is currently on the pageout queue, we can't do the
3137 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138 * and we can't remove it manually since we would need the object lock
3139 * (which is not required here) to decrement the activity_in_progress
3140 * reference which is held on the object while the page is in the pageout queue...
3141 * just let the normal laundry processing proceed
3142 */
3143 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3144 return;
3145
3146 VM_PAGE_QUEUES_REMOVE(m);
3147
3148 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3149 m->clean_queue = TRUE;
3150 vm_page_cleaned_count++;
3151
3152 m->inactive = TRUE;
3153 vm_page_inactive_count++;
3154 if (m->object->internal) {
3155 vm_page_pageable_internal_count++;
3156 } else {
3157 vm_page_pageable_external_count++;
3158 }
3159
3160 vm_pageout_enqueued_cleaned++;
3161 }
3162
3163 /*
3164 * vm_page_activate:
3165 *
3166 * Put the specified page on the active list (if appropriate).
3167 *
3168 * The page queues must be locked.
3169 */
3170
3171 void
3172 vm_page_activate(
3173 register vm_page_t m)
3174 {
3175 VM_PAGE_CHECK(m);
3176 #ifdef FIXME_4778297
3177 assert(m->object != kernel_object);
3178 #endif
3179 assert(m->phys_page != vm_page_guard_addr);
3180 #if DEBUG
3181 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3182 #endif
3183 assert( !(m->absent && !m->unusual));
3184
3185 if (m->gobbled) {
3186 assert( !VM_PAGE_WIRED(m));
3187 if (!m->private && !m->fictitious)
3188 vm_page_wire_count--;
3189 vm_page_gobble_count--;
3190 m->gobbled = FALSE;
3191 }
3192 /*
3193 * if this page is currently on the pageout queue, we can't do the
3194 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195 * and we can't remove it manually since we would need the object lock
3196 * (which is not required here) to decrement the activity_in_progress
3197 * reference which is held on the object while the page is in the pageout queue...
3198 * just let the normal laundry processing proceed
3199 */
3200 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3201 return;
3202
3203 #if DEBUG
3204 if (m->active)
3205 panic("vm_page_activate: already active");
3206 #endif
3207
3208 if (m->speculative) {
3209 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3210 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3211 }
3212
3213 VM_PAGE_QUEUES_REMOVE(m);
3214
3215 if ( !VM_PAGE_WIRED(m)) {
3216
3217 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3218 m->dirty && m->object->internal &&
3219 (m->object->purgable == VM_PURGABLE_DENY ||
3220 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3221 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3222 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3223 m->throttled = TRUE;
3224 vm_page_throttled_count++;
3225 } else {
3226 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3227 m->active = TRUE;
3228 vm_page_active_count++;
3229 if (m->object->internal) {
3230 vm_page_pageable_internal_count++;
3231 } else {
3232 vm_page_pageable_external_count++;
3233 }
3234 }
3235 m->reference = TRUE;
3236 m->no_cache = FALSE;
3237 }
3238 VM_PAGE_CHECK(m);
3239 }
3240
3241
3242 /*
3243 * vm_page_speculate:
3244 *
3245 * Put the specified page on the speculative list (if appropriate).
3246 *
3247 * The page queues must be locked.
3248 */
3249 void
3250 vm_page_speculate(
3251 vm_page_t m,
3252 boolean_t new)
3253 {
3254 struct vm_speculative_age_q *aq;
3255
3256 VM_PAGE_CHECK(m);
3257 assert(m->object != kernel_object);
3258 assert(m->phys_page != vm_page_guard_addr);
3259 #if DEBUG
3260 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3261 #endif
3262 assert( !(m->absent && !m->unusual));
3263
3264 /*
3265 * if this page is currently on the pageout queue, we can't do the
3266 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267 * and we can't remove it manually since we would need the object lock
3268 * (which is not required here) to decrement the activity_in_progress
3269 * reference which is held on the object while the page is in the pageout queue...
3270 * just let the normal laundry processing proceed
3271 */
3272 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3273 return;
3274
3275 VM_PAGE_QUEUES_REMOVE(m);
3276
3277 if ( !VM_PAGE_WIRED(m)) {
3278 mach_timespec_t ts;
3279 clock_sec_t sec;
3280 clock_nsec_t nsec;
3281
3282 clock_get_system_nanotime(&sec, &nsec);
3283 ts.tv_sec = (unsigned int) sec;
3284 ts.tv_nsec = nsec;
3285
3286 if (vm_page_speculative_count == 0) {
3287
3288 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3290
3291 aq = &vm_page_queue_speculative[speculative_age_index];
3292
3293 /*
3294 * set the timer to begin a new group
3295 */
3296 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3297 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3298
3299 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3300 } else {
3301 aq = &vm_page_queue_speculative[speculative_age_index];
3302
3303 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3304
3305 speculative_age_index++;
3306
3307 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3308 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3309 if (speculative_age_index == speculative_steal_index) {
3310 speculative_steal_index = speculative_age_index + 1;
3311
3312 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3313 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3314 }
3315 aq = &vm_page_queue_speculative[speculative_age_index];
3316
3317 if (!queue_empty(&aq->age_q))
3318 vm_page_speculate_ageit(aq);
3319
3320 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3321 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3322
3323 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3324 }
3325 }
3326 enqueue_tail(&aq->age_q, &m->pageq);
3327 m->speculative = TRUE;
3328 vm_page_speculative_count++;
3329 if (m->object->internal) {
3330 vm_page_pageable_internal_count++;
3331 } else {
3332 vm_page_pageable_external_count++;
3333 }
3334
3335 if (new == TRUE) {
3336 vm_object_lock_assert_exclusive(m->object);
3337
3338 m->object->pages_created++;
3339 #if DEVELOPMENT || DEBUG
3340 vm_page_speculative_created++;
3341 #endif
3342 }
3343 }
3344 VM_PAGE_CHECK(m);
3345 }
3346
3347
3348 /*
3349 * move pages from the specified aging bin to
3350 * the speculative bin that pageout_scan claims from
3351 *
3352 * The page queues must be locked.
3353 */
3354 void
3355 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3356 {
3357 struct vm_speculative_age_q *sq;
3358 vm_page_t t;
3359
3360 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3361
3362 if (queue_empty(&sq->age_q)) {
3363 sq->age_q.next = aq->age_q.next;
3364 sq->age_q.prev = aq->age_q.prev;
3365
3366 t = (vm_page_t)sq->age_q.next;
3367 t->pageq.prev = &sq->age_q;
3368
3369 t = (vm_page_t)sq->age_q.prev;
3370 t->pageq.next = &sq->age_q;
3371 } else {
3372 t = (vm_page_t)sq->age_q.prev;
3373 t->pageq.next = aq->age_q.next;
3374
3375 t = (vm_page_t)aq->age_q.next;
3376 t->pageq.prev = sq->age_q.prev;
3377
3378 t = (vm_page_t)aq->age_q.prev;
3379 t->pageq.next = &sq->age_q;
3380
3381 sq->age_q.prev = aq->age_q.prev;
3382 }
3383 queue_init(&aq->age_q);
3384 }
3385
3386
3387 void
3388 vm_page_lru(
3389 vm_page_t m)
3390 {
3391 VM_PAGE_CHECK(m);
3392 assert(m->object != kernel_object);
3393 assert(m->phys_page != vm_page_guard_addr);
3394
3395 #if DEBUG
3396 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3397 #endif
3398 /*
3399 * if this page is currently on the pageout queue, we can't do the
3400 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401 * and we can't remove it manually since we would need the object lock
3402 * (which is not required here) to decrement the activity_in_progress
3403 * reference which is held on the object while the page is in the pageout queue...
3404 * just let the normal laundry processing proceed
3405 */
3406 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3407 return;
3408
3409 m->no_cache = FALSE;
3410
3411 VM_PAGE_QUEUES_REMOVE(m);
3412
3413 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3414 }
3415
3416
3417 void
3418 vm_page_reactivate_all_throttled(void)
3419 {
3420 vm_page_t first_throttled, last_throttled;
3421 vm_page_t first_active;
3422 vm_page_t m;
3423 int extra_active_count;
3424 int extra_internal_count, extra_external_count;
3425
3426 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3427 return;
3428
3429 extra_active_count = 0;
3430 extra_internal_count = 0;
3431 extra_external_count = 0;
3432 vm_page_lock_queues();
3433 if (! queue_empty(&vm_page_queue_throttled)) {
3434 /*
3435 * Switch "throttled" pages to "active".
3436 */
3437 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3438 VM_PAGE_CHECK(m);
3439 assert(m->throttled);
3440 assert(!m->active);
3441 assert(!m->inactive);
3442 assert(!m->speculative);
3443 assert(!VM_PAGE_WIRED(m));
3444
3445 extra_active_count++;
3446 if (m->object->internal) {
3447 extra_internal_count++;
3448 } else {
3449 extra_external_count++;
3450 }
3451
3452 m->throttled = FALSE;
3453 m->active = TRUE;
3454 VM_PAGE_CHECK(m);
3455 }
3456
3457 /*
3458 * Transfer the entire throttled queue to a regular LRU page queues.
3459 * We insert it at the head of the active queue, so that these pages
3460 * get re-evaluated by the LRU algorithm first, since they've been
3461 * completely out of it until now.
3462 */
3463 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3464 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3465 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3466 if (queue_empty(&vm_page_queue_active)) {
3467 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3468 } else {
3469 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3470 }
3471 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3472 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3473 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3474
3475 #if DEBUG
3476 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3477 #endif
3478 queue_init(&vm_page_queue_throttled);
3479 /*
3480 * Adjust the global page counts.
3481 */
3482 vm_page_active_count += extra_active_count;
3483 vm_page_pageable_internal_count += extra_internal_count;
3484 vm_page_pageable_external_count += extra_external_count;
3485 vm_page_throttled_count = 0;
3486 }
3487 assert(vm_page_throttled_count == 0);
3488 assert(queue_empty(&vm_page_queue_throttled));
3489 vm_page_unlock_queues();
3490 }
3491
3492
3493 /*
3494 * move pages from the indicated local queue to the global active queue
3495 * its ok to fail if we're below the hard limit and force == FALSE
3496 * the nolocks == TRUE case is to allow this function to be run on
3497 * the hibernate path
3498 */
3499
3500 void
3501 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3502 {
3503 struct vpl *lq;
3504 vm_page_t first_local, last_local;
3505 vm_page_t first_active;
3506 vm_page_t m;
3507 uint32_t count = 0;
3508
3509 if (vm_page_local_q == NULL)
3510 return;
3511
3512 lq = &vm_page_local_q[lid].vpl_un.vpl;
3513
3514 if (nolocks == FALSE) {
3515 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3516 if ( !vm_page_trylockspin_queues())
3517 return;
3518 } else
3519 vm_page_lockspin_queues();
3520
3521 VPL_LOCK(&lq->vpl_lock);
3522 }
3523 if (lq->vpl_count) {
3524 /*
3525 * Switch "local" pages to "active".
3526 */
3527 assert(!queue_empty(&lq->vpl_queue));
3528
3529 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3530 VM_PAGE_CHECK(m);
3531 assert(m->local);
3532 assert(!m->active);
3533 assert(!m->inactive);
3534 assert(!m->speculative);
3535 assert(!VM_PAGE_WIRED(m));
3536 assert(!m->throttled);
3537 assert(!m->fictitious);
3538
3539 if (m->local_id != lid)
3540 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3541
3542 m->local_id = 0;
3543 m->local = FALSE;
3544 m->active = TRUE;
3545 VM_PAGE_CHECK(m);
3546
3547 count++;
3548 }
3549 if (count != lq->vpl_count)
3550 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3551
3552 /*
3553 * Transfer the entire local queue to a regular LRU page queues.
3554 */
3555 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3556 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3557 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3558
3559 if (queue_empty(&vm_page_queue_active)) {
3560 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3561 } else {
3562 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3563 }
3564 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3565 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3566 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3567
3568 queue_init(&lq->vpl_queue);
3569 /*
3570 * Adjust the global page counts.
3571 */
3572 vm_page_active_count += lq->vpl_count;
3573 vm_page_pageable_internal_count += lq->vpl_internal_count;
3574 vm_page_pageable_external_count += lq->vpl_external_count;
3575 lq->vpl_count = 0;
3576 lq->vpl_internal_count = 0;
3577 lq->vpl_external_count = 0;
3578 }
3579 assert(queue_empty(&lq->vpl_queue));
3580
3581 if (nolocks == FALSE) {
3582 VPL_UNLOCK(&lq->vpl_lock);
3583 vm_page_unlock_queues();
3584 }
3585 }
3586
3587 /*
3588 * vm_page_part_zero_fill:
3589 *
3590 * Zero-fill a part of the page.
3591 */
3592 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3593 void
3594 vm_page_part_zero_fill(
3595 vm_page_t m,
3596 vm_offset_t m_pa,
3597 vm_size_t len)
3598 {
3599
3600 #if 0
3601 /*
3602 * we don't hold the page queue lock
3603 * so this check isn't safe to make
3604 */
3605 VM_PAGE_CHECK(m);
3606 #endif
3607
3608 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609 pmap_zero_part_page(m->phys_page, m_pa, len);
3610 #else
3611 vm_page_t tmp;
3612 while (1) {
3613 tmp = vm_page_grab();
3614 if (tmp == VM_PAGE_NULL) {
3615 vm_page_wait(THREAD_UNINT);
3616 continue;
3617 }
3618 break;
3619 }
3620 vm_page_zero_fill(tmp);
3621 if(m_pa != 0) {
3622 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3623 }
3624 if((m_pa + len) < PAGE_SIZE) {
3625 vm_page_part_copy(m, m_pa + len, tmp,
3626 m_pa + len, PAGE_SIZE - (m_pa + len));
3627 }
3628 vm_page_copy(tmp,m);
3629 VM_PAGE_FREE(tmp);
3630 #endif
3631
3632 }
3633
3634 /*
3635 * vm_page_zero_fill:
3636 *
3637 * Zero-fill the specified page.
3638 */
3639 void
3640 vm_page_zero_fill(
3641 vm_page_t m)
3642 {
3643 XPR(XPR_VM_PAGE,
3644 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645 m->object, m->offset, m, 0,0);
3646 #if 0
3647 /*
3648 * we don't hold the page queue lock
3649 * so this check isn't safe to make
3650 */
3651 VM_PAGE_CHECK(m);
3652 #endif
3653
3654 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3655 pmap_zero_page(m->phys_page);
3656 }
3657
3658 /*
3659 * vm_page_part_copy:
3660 *
3661 * copy part of one page to another
3662 */
3663
3664 void
3665 vm_page_part_copy(
3666 vm_page_t src_m,
3667 vm_offset_t src_pa,
3668 vm_page_t dst_m,
3669 vm_offset_t dst_pa,
3670 vm_size_t len)
3671 {
3672 #if 0
3673 /*
3674 * we don't hold the page queue lock
3675 * so this check isn't safe to make
3676 */
3677 VM_PAGE_CHECK(src_m);
3678 VM_PAGE_CHECK(dst_m);
3679 #endif
3680 pmap_copy_part_page(src_m->phys_page, src_pa,
3681 dst_m->phys_page, dst_pa, len);
3682 }
3683
3684 /*
3685 * vm_page_copy:
3686 *
3687 * Copy one page to another
3688 *
3689 * ENCRYPTED SWAP:
3690 * The source page should not be encrypted. The caller should
3691 * make sure the page is decrypted first, if necessary.
3692 */
3693
3694 int vm_page_copy_cs_validations = 0;
3695 int vm_page_copy_cs_tainted = 0;
3696
3697 void
3698 vm_page_copy(
3699 vm_page_t src_m,
3700 vm_page_t dest_m)
3701 {
3702 XPR(XPR_VM_PAGE,
3703 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704 src_m->object, src_m->offset,
3705 dest_m->object, dest_m->offset,
3706 0);
3707 #if 0
3708 /*
3709 * we don't hold the page queue lock
3710 * so this check isn't safe to make
3711 */
3712 VM_PAGE_CHECK(src_m);
3713 VM_PAGE_CHECK(dest_m);
3714 #endif
3715 vm_object_lock_assert_held(src_m->object);
3716
3717 /*
3718 * ENCRYPTED SWAP:
3719 * The source page should not be encrypted at this point.
3720 * The destination page will therefore not contain encrypted
3721 * data after the copy.
3722 */
3723 if (src_m->encrypted) {
3724 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3725 }
3726 dest_m->encrypted = FALSE;
3727
3728 if (src_m->object != VM_OBJECT_NULL &&
3729 src_m->object->code_signed) {
3730 /*
3731 * We're copying a page from a code-signed object.
3732 * Whoever ends up mapping the copy page might care about
3733 * the original page's integrity, so let's validate the
3734 * source page now.
3735 */
3736 vm_page_copy_cs_validations++;
3737 vm_page_validate_cs(src_m);
3738 }
3739
3740 if (vm_page_is_slideable(src_m)) {
3741 boolean_t was_busy = src_m->busy;
3742 src_m->busy = TRUE;
3743 (void) vm_page_slide(src_m, 0);
3744 assert(src_m->busy);
3745 if (!was_busy) {
3746 PAGE_WAKEUP_DONE(src_m);
3747 }
3748 }
3749
3750 /*
3751 * Propagate the cs_tainted bit to the copy page. Do not propagate
3752 * the cs_validated bit.
3753 */
3754 dest_m->cs_tainted = src_m->cs_tainted;
3755 if (dest_m->cs_tainted) {
3756 vm_page_copy_cs_tainted++;
3757 }
3758 dest_m->slid = src_m->slid;
3759 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3760 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3761 }
3762
3763 #if MACH_ASSERT
3764 static void
3765 _vm_page_print(
3766 vm_page_t p)
3767 {
3768 printf("vm_page %p: \n", p);
3769 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3770 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3771 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3772 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3773 printf(" wire_count=%u\n", p->wire_count);
3774
3775 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776 (p->local ? "" : "!"),
3777 (p->inactive ? "" : "!"),
3778 (p->active ? "" : "!"),
3779 (p->pageout_queue ? "" : "!"),
3780 (p->speculative ? "" : "!"),
3781 (p->laundry ? "" : "!"));
3782 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783 (p->free ? "" : "!"),
3784 (p->reference ? "" : "!"),
3785 (p->gobbled ? "" : "!"),
3786 (p->private ? "" : "!"),
3787 (p->throttled ? "" : "!"));
3788 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789 (p->busy ? "" : "!"),
3790 (p->wanted ? "" : "!"),
3791 (p->tabled ? "" : "!"),
3792 (p->fictitious ? "" : "!"),
3793 (p->pmapped ? "" : "!"),
3794 (p->wpmapped ? "" : "!"));
3795 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796 (p->pageout ? "" : "!"),
3797 (p->absent ? "" : "!"),
3798 (p->error ? "" : "!"),
3799 (p->dirty ? "" : "!"),
3800 (p->cleaning ? "" : "!"),
3801 (p->precious ? "" : "!"),
3802 (p->clustered ? "" : "!"));
3803 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804 (p->overwriting ? "" : "!"),
3805 (p->restart ? "" : "!"),
3806 (p->unusual ? "" : "!"),
3807 (p->encrypted ? "" : "!"),
3808 (p->encrypted_cleaning ? "" : "!"));
3809 printf(" %scs_validated, %scs_tainted, %scs_nx, %sno_cache\n",
3810 (p->cs_validated ? "" : "!"),
3811 (p->cs_tainted ? "" : "!"),
3812 (p->cs_nx ? "" : "!"),
3813 (p->no_cache ? "" : "!"));
3814
3815 printf("phys_page=0x%x\n", p->phys_page);
3816 }
3817
3818 /*
3819 * Check that the list of pages is ordered by
3820 * ascending physical address and has no holes.
3821 */
3822 static int
3823 vm_page_verify_contiguous(
3824 vm_page_t pages,
3825 unsigned int npages)
3826 {
3827 register vm_page_t m;
3828 unsigned int page_count;
3829 vm_offset_t prev_addr;
3830
3831 prev_addr = pages->phys_page;
3832 page_count = 1;
3833 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3834 if (m->phys_page != prev_addr + 1) {
3835 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3836 m, (long)prev_addr, m->phys_page);
3837 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3838 panic("vm_page_verify_contiguous: not contiguous!");
3839 }
3840 prev_addr = m->phys_page;
3841 ++page_count;
3842 }
3843 if (page_count != npages) {
3844 printf("pages %p actual count 0x%x but requested 0x%x\n",
3845 pages, page_count, npages);
3846 panic("vm_page_verify_contiguous: count error");
3847 }
3848 return 1;
3849 }
3850
3851
3852 /*
3853 * Check the free lists for proper length etc.
3854 */
3855 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3856 static unsigned int
3857 vm_page_verify_free_list(
3858 queue_head_t *vm_page_queue,
3859 unsigned int color,
3860 vm_page_t look_for_page,
3861 boolean_t expect_page)
3862 {
3863 unsigned int npages;
3864 vm_page_t m;
3865 vm_page_t prev_m;
3866 boolean_t found_page;
3867
3868 if (! vm_page_verify_this_free_list_enabled)
3869 return 0;
3870
3871 found_page = FALSE;
3872 npages = 0;
3873 prev_m = (vm_page_t) vm_page_queue;
3874 queue_iterate(vm_page_queue,
3875 m,
3876 vm_page_t,
3877 pageq) {
3878
3879 if (m == look_for_page) {
3880 found_page = TRUE;
3881 }
3882 if ((vm_page_t) m->pageq.prev != prev_m)
3883 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3884 color, npages, m, m->pageq.prev, prev_m);
3885 if ( ! m->busy )
3886 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3887 color, npages, m);
3888 if (color != (unsigned int) -1) {
3889 if ((m->phys_page & vm_color_mask) != color)
3890 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3891 color, npages, m, m->phys_page & vm_color_mask, color);
3892 if ( ! m->free )
3893 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3894 color, npages, m);
3895 }
3896 ++npages;
3897 prev_m = m;
3898 }
3899 if (look_for_page != VM_PAGE_NULL) {
3900 unsigned int other_color;
3901
3902 if (expect_page && !found_page) {
3903 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3904 color, npages, look_for_page, look_for_page->phys_page);
3905 _vm_page_print(look_for_page);
3906 for (other_color = 0;
3907 other_color < vm_colors;
3908 other_color++) {
3909 if (other_color == color)
3910 continue;
3911 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3912 other_color, look_for_page, FALSE);
3913 }
3914 if (color == (unsigned int) -1) {
3915 vm_page_verify_free_list(&vm_lopage_queue_free,
3916 (unsigned int) -1, look_for_page, FALSE);
3917 }
3918 panic("vm_page_verify_free_list(color=%u)\n", color);
3919 }
3920 if (!expect_page && found_page) {
3921 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3922 color, npages, look_for_page, look_for_page->phys_page);
3923 }
3924 }
3925 return npages;
3926 }
3927
3928 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3929 static void
3930 vm_page_verify_free_lists( void )
3931 {
3932 unsigned int color, npages, nlopages;
3933 boolean_t toggle = TRUE;
3934
3935 if (! vm_page_verify_all_free_lists_enabled)
3936 return;
3937
3938 npages = 0;
3939
3940 lck_mtx_lock(&vm_page_queue_free_lock);
3941
3942 if (vm_page_verify_this_free_list_enabled == TRUE) {
3943 /*
3944 * This variable has been set globally for extra checking of
3945 * each free list Q. Since we didn't set it, we don't own it
3946 * and we shouldn't toggle it.
3947 */
3948 toggle = FALSE;
3949 }
3950
3951 if (toggle == TRUE) {
3952 vm_page_verify_this_free_list_enabled = TRUE;
3953 }
3954
3955 for( color = 0; color < vm_colors; color++ ) {
3956 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3957 color, VM_PAGE_NULL, FALSE);
3958 }
3959 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3960 (unsigned int) -1,
3961 VM_PAGE_NULL, FALSE);
3962 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3963 panic("vm_page_verify_free_lists: "
3964 "npages %u free_count %d nlopages %u lo_free_count %u",
3965 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3966
3967 if (toggle == TRUE) {
3968 vm_page_verify_this_free_list_enabled = FALSE;
3969 }
3970
3971 lck_mtx_unlock(&vm_page_queue_free_lock);
3972 }
3973
3974 void
3975 vm_page_queues_assert(
3976 vm_page_t mem,
3977 int val)
3978 {
3979 #if DEBUG
3980 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3981 #endif
3982 if (mem->free + mem->active + mem->inactive + mem->speculative +
3983 mem->throttled + mem->pageout_queue > (val)) {
3984 _vm_page_print(mem);
3985 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3986 }
3987 if (VM_PAGE_WIRED(mem)) {
3988 assert(!mem->active);
3989 assert(!mem->inactive);
3990 assert(!mem->speculative);
3991 assert(!mem->throttled);
3992 assert(!mem->pageout_queue);
3993 }
3994 }
3995 #endif /* MACH_ASSERT */
3996
3997
3998 /*
3999 * CONTIGUOUS PAGE ALLOCATION
4000 *
4001 * Find a region large enough to contain at least n pages
4002 * of contiguous physical memory.
4003 *
4004 * This is done by traversing the vm_page_t array in a linear fashion
4005 * we assume that the vm_page_t array has the avaiable physical pages in an
4006 * ordered, ascending list... this is currently true of all our implementations
4007 * and must remain so... there can be 'holes' in the array... we also can
4008 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4009 * which use to happen via 'vm_page_convert'... that function was no longer
4010 * being called and was removed...
4011 *
4012 * The basic flow consists of stabilizing some of the interesting state of
4013 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4014 * sweep at the beginning of the array looking for pages that meet our criterea
4015 * for a 'stealable' page... currently we are pretty conservative... if the page
4016 * meets this criterea and is physically contiguous to the previous page in the 'run'
4017 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4018 * and start to develop a new run... if at this point we've already considered
4019 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4020 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4021 * to other threads trying to acquire free pages (or move pages from q to q),
4022 * and then continue from the spot we left off... we only make 1 pass through the
4023 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4024 * which steals the pages from the queues they're currently on... pages on the free
4025 * queue can be stolen directly... pages that are on any of the other queues
4026 * must be removed from the object they are tabled on... this requires taking the
4027 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4028 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4029 * dump the pages we've currently stolen back to the free list, and pick up our
4030 * scan from the point where we aborted the 'current' run.
4031 *
4032 *
4033 * Requirements:
4034 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4035 *
4036 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4037 *
4038 * Algorithm:
4039 */
4040
4041 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4042
4043
4044 #define RESET_STATE_OF_RUN() \
4045 MACRO_BEGIN \
4046 prevcontaddr = -2; \
4047 start_pnum = -1; \
4048 free_considered = 0; \
4049 substitute_needed = 0; \
4050 npages = 0; \
4051 MACRO_END
4052
4053 /*
4054 * Can we steal in-use (i.e. not free) pages when searching for
4055 * physically-contiguous pages ?
4056 */
4057 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4058
4059 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4060 #if DEBUG
4061 int vm_page_find_contig_debug = 0;
4062 #endif
4063
4064 static vm_page_t
4065 vm_page_find_contiguous(
4066 unsigned int contig_pages,
4067 ppnum_t max_pnum,
4068 ppnum_t pnum_mask,
4069 boolean_t wire,
4070 int flags)
4071 {
4072 vm_page_t m = NULL;
4073 ppnum_t prevcontaddr;
4074 ppnum_t start_pnum;
4075 unsigned int npages, considered, scanned;
4076 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4077 unsigned int idx_last_contig_page_found = 0;
4078 int free_considered, free_available;
4079 int substitute_needed;
4080 boolean_t wrapped;
4081 #if DEBUG
4082 clock_sec_t tv_start_sec, tv_end_sec;
4083 clock_usec_t tv_start_usec, tv_end_usec;
4084 #endif
4085 #if MACH_ASSERT
4086 int yielded = 0;
4087 int dumped_run = 0;
4088 int stolen_pages = 0;
4089 int compressed_pages = 0;
4090 #endif
4091
4092 if (contig_pages == 0)
4093 return VM_PAGE_NULL;
4094
4095 #if MACH_ASSERT
4096 vm_page_verify_free_lists();
4097 #endif
4098 #if DEBUG
4099 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4100 #endif
4101 PAGE_REPLACEMENT_ALLOWED(TRUE);
4102
4103 vm_page_lock_queues();
4104 lck_mtx_lock(&vm_page_queue_free_lock);
4105
4106 RESET_STATE_OF_RUN();
4107
4108 scanned = 0;
4109 considered = 0;
4110 free_available = vm_page_free_count - vm_page_free_reserved;
4111
4112 wrapped = FALSE;
4113
4114 if(flags & KMA_LOMEM)
4115 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4116 else
4117 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4118
4119 orig_last_idx = idx_last_contig_page_found;
4120 last_idx = orig_last_idx;
4121
4122 for (page_idx = last_idx, start_idx = last_idx;
4123 npages < contig_pages && page_idx < vm_pages_count;
4124 page_idx++) {
4125 retry:
4126 if (wrapped &&
4127 npages == 0 &&
4128 page_idx >= orig_last_idx) {
4129 /*
4130 * We're back where we started and we haven't
4131 * found any suitable contiguous range. Let's
4132 * give up.
4133 */
4134 break;
4135 }
4136 scanned++;
4137 m = &vm_pages[page_idx];
4138
4139 assert(!m->fictitious);
4140 assert(!m->private);
4141
4142 if (max_pnum && m->phys_page > max_pnum) {
4143 /* no more low pages... */
4144 break;
4145 }
4146 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4147 /*
4148 * not aligned
4149 */
4150 RESET_STATE_OF_RUN();
4151
4152 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4153 m->encrypted_cleaning ||
4154 m->pageout_queue || m->laundry || m->wanted ||
4155 m->cleaning || m->overwriting || m->pageout) {
4156 /*
4157 * page is in a transient state
4158 * or a state we don't want to deal
4159 * with, so don't consider it which
4160 * means starting a new run
4161 */
4162 RESET_STATE_OF_RUN();
4163
4164 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4165 /*
4166 * page needs to be on one of our queues
4167 * or it needs to belong to the compressor pool
4168 * in order for it to be stable behind the
4169 * locks we hold at this point...
4170 * if not, don't consider it which
4171 * means starting a new run
4172 */
4173 RESET_STATE_OF_RUN();
4174
4175 } else if (!m->free && (!m->tabled || m->busy)) {
4176 /*
4177 * pages on the free list are always 'busy'
4178 * so we couldn't test for 'busy' in the check
4179 * for the transient states... pages that are
4180 * 'free' are never 'tabled', so we also couldn't
4181 * test for 'tabled'. So we check here to make
4182 * sure that a non-free page is not busy and is
4183 * tabled on an object...
4184 * if not, don't consider it which
4185 * means starting a new run
4186 */
4187 RESET_STATE_OF_RUN();
4188
4189 } else {
4190 if (m->phys_page != prevcontaddr + 1) {
4191 if ((m->phys_page & pnum_mask) != 0) {
4192 RESET_STATE_OF_RUN();
4193 goto did_consider;
4194 } else {
4195 npages = 1;
4196 start_idx = page_idx;
4197 start_pnum = m->phys_page;
4198 }
4199 } else {
4200 npages++;
4201 }
4202 prevcontaddr = m->phys_page;
4203
4204 VM_PAGE_CHECK(m);
4205 if (m->free) {
4206 free_considered++;
4207 } else {
4208 /*
4209 * This page is not free.
4210 * If we can't steal used pages,
4211 * we have to give up this run
4212 * and keep looking.
4213 * Otherwise, we might need to
4214 * move the contents of this page
4215 * into a substitute page.
4216 */
4217 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4218 if (m->pmapped || m->dirty || m->precious) {
4219 substitute_needed++;
4220 }
4221 #else
4222 RESET_STATE_OF_RUN();
4223 #endif
4224 }
4225
4226 if ((free_considered + substitute_needed) > free_available) {
4227 /*
4228 * if we let this run continue
4229 * we will end up dropping the vm_page_free_count
4230 * below the reserve limit... we need to abort
4231 * this run, but we can at least re-consider this
4232 * page... thus the jump back to 'retry'
4233 */
4234 RESET_STATE_OF_RUN();
4235
4236 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4237 considered++;
4238 goto retry;
4239 }
4240 /*
4241 * free_available == 0
4242 * so can't consider any free pages... if
4243 * we went to retry in this case, we'd
4244 * get stuck looking at the same page
4245 * w/o making any forward progress
4246 * we also want to take this path if we've already
4247 * reached our limit that controls the lock latency
4248 */
4249 }
4250 }
4251 did_consider:
4252 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4253
4254 PAGE_REPLACEMENT_ALLOWED(FALSE);
4255
4256 lck_mtx_unlock(&vm_page_queue_free_lock);
4257 vm_page_unlock_queues();
4258
4259 mutex_pause(0);
4260
4261 PAGE_REPLACEMENT_ALLOWED(TRUE);
4262
4263 vm_page_lock_queues();
4264 lck_mtx_lock(&vm_page_queue_free_lock);
4265
4266 RESET_STATE_OF_RUN();
4267 /*
4268 * reset our free page limit since we
4269 * dropped the lock protecting the vm_page_free_queue
4270 */
4271 free_available = vm_page_free_count - vm_page_free_reserved;
4272 considered = 0;
4273 #if MACH_ASSERT
4274 yielded++;
4275 #endif
4276 goto retry;
4277 }
4278 considered++;
4279 }
4280 m = VM_PAGE_NULL;
4281
4282 if (npages != contig_pages) {
4283 if (!wrapped) {
4284 /*
4285 * We didn't find a contiguous range but we didn't
4286 * start from the very first page.
4287 * Start again from the very first page.
4288 */
4289 RESET_STATE_OF_RUN();
4290 if( flags & KMA_LOMEM)
4291 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4292 else
4293 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4294 last_idx = 0;
4295 page_idx = last_idx;
4296 wrapped = TRUE;
4297 goto retry;
4298 }
4299 lck_mtx_unlock(&vm_page_queue_free_lock);
4300 } else {
4301 vm_page_t m1;
4302 vm_page_t m2;
4303 unsigned int cur_idx;
4304 unsigned int tmp_start_idx;
4305 vm_object_t locked_object = VM_OBJECT_NULL;
4306 boolean_t abort_run = FALSE;
4307
4308 assert(page_idx - start_idx == contig_pages);
4309
4310 tmp_start_idx = start_idx;
4311
4312 /*
4313 * first pass through to pull the free pages
4314 * off of the free queue so that in case we
4315 * need substitute pages, we won't grab any
4316 * of the free pages in the run... we'll clear
4317 * the 'free' bit in the 2nd pass, and even in
4318 * an abort_run case, we'll collect all of the
4319 * free pages in this run and return them to the free list
4320 */
4321 while (start_idx < page_idx) {
4322
4323 m1 = &vm_pages[start_idx++];
4324
4325 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4326 assert(m1->free);
4327 #endif
4328
4329 if (m1->free) {
4330 unsigned int color;
4331
4332 color = m1->phys_page & vm_color_mask;
4333 #if MACH_ASSERT
4334 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4335 #endif
4336 queue_remove(&vm_page_queue_free[color],
4337 m1,
4338 vm_page_t,
4339 pageq);
4340 m1->pageq.next = NULL;
4341 m1->pageq.prev = NULL;
4342 #if MACH_ASSERT
4343 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4344 #endif
4345 /*
4346 * Clear the "free" bit so that this page
4347 * does not get considered for another
4348 * concurrent physically-contiguous allocation.
4349 */
4350 m1->free = FALSE;
4351 assert(m1->busy);
4352
4353 vm_page_free_count--;
4354 }
4355 }
4356 if( flags & KMA_LOMEM)
4357 vm_page_lomem_find_contiguous_last_idx = page_idx;
4358 else
4359 vm_page_find_contiguous_last_idx = page_idx;
4360
4361 /*
4362 * we can drop the free queue lock at this point since
4363 * we've pulled any 'free' candidates off of the list
4364 * we need it dropped so that we can do a vm_page_grab
4365 * when substituing for pmapped/dirty pages
4366 */
4367 lck_mtx_unlock(&vm_page_queue_free_lock);
4368
4369 start_idx = tmp_start_idx;
4370 cur_idx = page_idx - 1;
4371
4372 while (start_idx++ < page_idx) {
4373 /*
4374 * must go through the list from back to front
4375 * so that the page list is created in the
4376 * correct order - low -> high phys addresses
4377 */
4378 m1 = &vm_pages[cur_idx--];
4379
4380 assert(!m1->free);
4381
4382 if (m1->object == VM_OBJECT_NULL) {
4383 /*
4384 * page has already been removed from
4385 * the free list in the 1st pass
4386 */
4387 assert(m1->offset == (vm_object_offset_t) -1);
4388 assert(m1->busy);
4389 assert(!m1->wanted);
4390 assert(!m1->laundry);
4391 } else {
4392 vm_object_t object;
4393 int refmod;
4394 boolean_t disconnected, reusable;
4395
4396 if (abort_run == TRUE)
4397 continue;
4398
4399 object = m1->object;
4400
4401 if (object != locked_object) {
4402 if (locked_object) {
4403 vm_object_unlock(locked_object);
4404 locked_object = VM_OBJECT_NULL;
4405 }
4406 if (vm_object_lock_try(object))
4407 locked_object = object;
4408 }
4409 if (locked_object == VM_OBJECT_NULL ||
4410 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4411 m1->encrypted_cleaning ||
4412 m1->pageout_queue || m1->laundry || m1->wanted ||
4413 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4414
4415 if (locked_object) {
4416 vm_object_unlock(locked_object);
4417 locked_object = VM_OBJECT_NULL;
4418 }
4419 tmp_start_idx = cur_idx;
4420 abort_run = TRUE;
4421 continue;
4422 }
4423
4424 disconnected = FALSE;
4425 reusable = FALSE;
4426
4427 if ((m1->reusable ||
4428 m1->object->all_reusable) &&
4429 m1->inactive &&
4430 !m1->dirty &&
4431 !m1->reference) {
4432 /* reusable page... */
4433 refmod = pmap_disconnect(m1->phys_page);
4434 disconnected = TRUE;
4435 if (refmod == 0) {
4436 /*
4437 * ... not reused: can steal
4438 * without relocating contents.
4439 */
4440 reusable = TRUE;
4441 }
4442 }
4443
4444 if ((m1->pmapped &&
4445 ! reusable) ||
4446 m1->dirty ||
4447 m1->precious) {
4448 vm_object_offset_t offset;
4449
4450 m2 = vm_page_grab();
4451
4452 if (m2 == VM_PAGE_NULL) {
4453 if (locked_object) {
4454 vm_object_unlock(locked_object);
4455 locked_object = VM_OBJECT_NULL;
4456 }
4457 tmp_start_idx = cur_idx;
4458 abort_run = TRUE;
4459 continue;
4460 }
4461 if (! disconnected) {
4462 if (m1->pmapped)
4463 refmod = pmap_disconnect(m1->phys_page);
4464 else
4465 refmod = 0;
4466 }
4467
4468 /* copy the page's contents */
4469 pmap_copy_page(m1->phys_page, m2->phys_page);
4470 /* copy the page's state */
4471 assert(!VM_PAGE_WIRED(m1));
4472 assert(!m1->free);
4473 assert(!m1->pageout_queue);
4474 assert(!m1->laundry);
4475 m2->reference = m1->reference;
4476 assert(!m1->gobbled);
4477 assert(!m1->private);
4478 m2->no_cache = m1->no_cache;
4479 m2->xpmapped = 0;
4480 assert(!m1->busy);
4481 assert(!m1->wanted);
4482 assert(!m1->fictitious);
4483 m2->pmapped = m1->pmapped; /* should flush cache ? */
4484 m2->wpmapped = m1->wpmapped;
4485 assert(!m1->pageout);
4486 m2->absent = m1->absent;
4487 m2->error = m1->error;
4488 m2->dirty = m1->dirty;
4489 assert(!m1->cleaning);
4490 m2->precious = m1->precious;
4491 m2->clustered = m1->clustered;
4492 assert(!m1->overwriting);
4493 m2->restart = m1->restart;
4494 m2->unusual = m1->unusual;
4495 m2->encrypted = m1->encrypted;
4496 assert(!m1->encrypted_cleaning);
4497 m2->cs_validated = m1->cs_validated;
4498 m2->cs_tainted = m1->cs_tainted;
4499 m2->cs_nx = m1->cs_nx;
4500
4501 /*
4502 * If m1 had really been reusable,
4503 * we would have just stolen it, so
4504 * let's not propagate it's "reusable"
4505 * bit and assert that m2 is not
4506 * marked as "reusable".
4507 */
4508 // m2->reusable = m1->reusable;
4509 assert(!m2->reusable);
4510
4511 assert(!m1->lopage);
4512 m2->slid = m1->slid;
4513 m2->compressor = m1->compressor;
4514
4515 /*
4516 * page may need to be flushed if
4517 * it is marshalled into a UPL
4518 * that is going to be used by a device
4519 * that doesn't support coherency
4520 */
4521 m2->written_by_kernel = TRUE;
4522
4523 /*
4524 * make sure we clear the ref/mod state
4525 * from the pmap layer... else we risk
4526 * inheriting state from the last time
4527 * this page was used...
4528 */
4529 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4530
4531 if (refmod & VM_MEM_REFERENCED)
4532 m2->reference = TRUE;
4533 if (refmod & VM_MEM_MODIFIED) {
4534 SET_PAGE_DIRTY(m2, TRUE);
4535 }
4536 offset = m1->offset;
4537
4538 /*
4539 * completely cleans up the state
4540 * of the page so that it is ready
4541 * to be put onto the free list, or
4542 * for this purpose it looks like it
4543 * just came off of the free list
4544 */
4545 vm_page_free_prepare(m1);
4546
4547 /*
4548 * now put the substitute page
4549 * on the object
4550 */
4551 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4552
4553 if (m2->compressor) {
4554 m2->pmapped = TRUE;
4555 m2->wpmapped = TRUE;
4556
4557 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4558 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4559 #if MACH_ASSERT
4560 compressed_pages++;
4561 #endif
4562 } else {
4563 if (m2->reference)
4564 vm_page_activate(m2);
4565 else
4566 vm_page_deactivate(m2);
4567 }
4568 PAGE_WAKEUP_DONE(m2);
4569
4570 } else {
4571 assert(!m1->compressor);
4572
4573 /*
4574 * completely cleans up the state
4575 * of the page so that it is ready
4576 * to be put onto the free list, or
4577 * for this purpose it looks like it
4578 * just came off of the free list
4579 */
4580 vm_page_free_prepare(m1);
4581 }
4582 #if MACH_ASSERT
4583 stolen_pages++;
4584 #endif
4585 }
4586 m1->pageq.next = (queue_entry_t) m;
4587 m1->pageq.prev = NULL;
4588 m = m1;
4589 }
4590 if (locked_object) {
4591 vm_object_unlock(locked_object);
4592 locked_object = VM_OBJECT_NULL;
4593 }
4594
4595 if (abort_run == TRUE) {
4596 if (m != VM_PAGE_NULL) {
4597 vm_page_free_list(m, FALSE);
4598 }
4599 #if MACH_ASSERT
4600 dumped_run++;
4601 #endif
4602 /*
4603 * want the index of the last
4604 * page in this run that was
4605 * successfully 'stolen', so back
4606 * it up 1 for the auto-decrement on use
4607 * and 1 more to bump back over this page
4608 */
4609 page_idx = tmp_start_idx + 2;
4610 if (page_idx >= vm_pages_count) {
4611 if (wrapped)
4612 goto done_scanning;
4613 page_idx = last_idx = 0;
4614 wrapped = TRUE;
4615 }
4616 abort_run = FALSE;
4617
4618 /*
4619 * We didn't find a contiguous range but we didn't
4620 * start from the very first page.
4621 * Start again from the very first page.
4622 */
4623 RESET_STATE_OF_RUN();
4624
4625 if( flags & KMA_LOMEM)
4626 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4627 else
4628 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4629
4630 last_idx = page_idx;
4631
4632 lck_mtx_lock(&vm_page_queue_free_lock);
4633 /*
4634 * reset our free page limit since we
4635 * dropped the lock protecting the vm_page_free_queue
4636 */
4637 free_available = vm_page_free_count - vm_page_free_reserved;
4638 goto retry;
4639 }
4640
4641 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4642
4643 if (wire == TRUE)
4644 m1->wire_count++;
4645 else
4646 m1->gobbled = TRUE;
4647 }
4648 if (wire == FALSE)
4649 vm_page_gobble_count += npages;
4650
4651 /*
4652 * gobbled pages are also counted as wired pages
4653 */
4654 vm_page_wire_count += npages;
4655
4656 assert(vm_page_verify_contiguous(m, npages));
4657 }
4658 done_scanning:
4659 PAGE_REPLACEMENT_ALLOWED(FALSE);
4660
4661 vm_page_unlock_queues();
4662
4663 #if DEBUG
4664 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4665
4666 tv_end_sec -= tv_start_sec;
4667 if (tv_end_usec < tv_start_usec) {
4668 tv_end_sec--;
4669 tv_end_usec += 1000000;
4670 }
4671 tv_end_usec -= tv_start_usec;
4672 if (tv_end_usec >= 1000000) {
4673 tv_end_sec++;
4674 tv_end_sec -= 1000000;
4675 }
4676 if (vm_page_find_contig_debug) {
4677 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4678 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4679 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4680 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4681 }
4682
4683 #endif
4684 #if MACH_ASSERT
4685 vm_page_verify_free_lists();
4686 #endif
4687 return m;
4688 }
4689
4690 /*
4691 * Allocate a list of contiguous, wired pages.
4692 */
4693 kern_return_t
4694 cpm_allocate(
4695 vm_size_t size,
4696 vm_page_t *list,
4697 ppnum_t max_pnum,
4698 ppnum_t pnum_mask,
4699 boolean_t wire,
4700 int flags)
4701 {
4702 vm_page_t pages;
4703 unsigned int npages;
4704
4705 if (size % PAGE_SIZE != 0)
4706 return KERN_INVALID_ARGUMENT;
4707
4708 npages = (unsigned int) (size / PAGE_SIZE);
4709 if (npages != size / PAGE_SIZE) {
4710 /* 32-bit overflow */
4711 return KERN_INVALID_ARGUMENT;
4712 }
4713
4714 /*
4715 * Obtain a pointer to a subset of the free
4716 * list large enough to satisfy the request;
4717 * the region will be physically contiguous.
4718 */
4719 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4720
4721 if (pages == VM_PAGE_NULL)
4722 return KERN_NO_SPACE;
4723 /*
4724 * determine need for wakeups
4725 */
4726 if ((vm_page_free_count < vm_page_free_min) ||
4727 ((vm_page_free_count < vm_page_free_target) &&
4728 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4729 thread_wakeup((event_t) &vm_page_free_wanted);
4730
4731 VM_CHECK_MEMORYSTATUS;
4732
4733 /*
4734 * The CPM pages should now be available and
4735 * ordered by ascending physical address.
4736 */
4737 assert(vm_page_verify_contiguous(pages, npages));
4738
4739 *list = pages;
4740 return KERN_SUCCESS;
4741 }
4742
4743
4744 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4745
4746 /*
4747 * when working on a 'run' of pages, it is necessary to hold
4748 * the vm_page_queue_lock (a hot global lock) for certain operations
4749 * on the page... however, the majority of the work can be done
4750 * while merely holding the object lock... in fact there are certain
4751 * collections of pages that don't require any work brokered by the
4752 * vm_page_queue_lock... to mitigate the time spent behind the global
4753 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4754 * while doing all of the work that doesn't require the vm_page_queue_lock...
4755 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4756 * necessary work for each page... we will grab the busy bit on the page
4757 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4758 * if it can't immediately take the vm_page_queue_lock in order to compete
4759 * for the locks in the same order that vm_pageout_scan takes them.
4760 * the operation names are modeled after the names of the routines that
4761 * need to be called in order to make the changes very obvious in the
4762 * original loop
4763 */
4764
4765 void
4766 vm_page_do_delayed_work(
4767 vm_object_t object,
4768 struct vm_page_delayed_work *dwp,
4769 int dw_count)
4770 {
4771 int j;
4772 vm_page_t m;
4773 vm_page_t local_free_q = VM_PAGE_NULL;
4774
4775 /*
4776 * pageout_scan takes the vm_page_lock_queues first
4777 * then tries for the object lock... to avoid what
4778 * is effectively a lock inversion, we'll go to the
4779 * trouble of taking them in that same order... otherwise
4780 * if this object contains the majority of the pages resident
4781 * in the UBC (or a small set of large objects actively being
4782 * worked on contain the majority of the pages), we could
4783 * cause the pageout_scan thread to 'starve' in its attempt
4784 * to find pages to move to the free queue, since it has to
4785 * successfully acquire the object lock of any candidate page
4786 * before it can steal/clean it.
4787 */
4788 if (!vm_page_trylockspin_queues()) {
4789 vm_object_unlock(object);
4790
4791 vm_page_lockspin_queues();
4792
4793 for (j = 0; ; j++) {
4794 if (!vm_object_lock_avoid(object) &&
4795 _vm_object_lock_try(object))
4796 break;
4797 vm_page_unlock_queues();
4798 mutex_pause(j);
4799 vm_page_lockspin_queues();
4800 }
4801 }
4802 for (j = 0; j < dw_count; j++, dwp++) {
4803
4804 m = dwp->dw_m;
4805
4806 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4807 vm_pageout_throttle_up(m);
4808 #if CONFIG_PHANTOM_CACHE
4809 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4810 vm_phantom_cache_update(m);
4811 #endif
4812 if (dwp->dw_mask & DW_vm_page_wire)
4813 vm_page_wire(m);
4814 else if (dwp->dw_mask & DW_vm_page_unwire) {
4815 boolean_t queueit;
4816
4817 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4818
4819 vm_page_unwire(m, queueit);
4820 }
4821 if (dwp->dw_mask & DW_vm_page_free) {
4822 vm_page_free_prepare_queues(m);
4823
4824 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4825 /*
4826 * Add this page to our list of reclaimed pages,
4827 * to be freed later.
4828 */
4829 m->pageq.next = (queue_entry_t) local_free_q;
4830 local_free_q = m;
4831 } else {
4832 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4833 vm_page_deactivate_internal(m, FALSE);
4834 else if (dwp->dw_mask & DW_vm_page_activate) {
4835 if (m->active == FALSE) {
4836 vm_page_activate(m);
4837 }
4838 }
4839 else if (dwp->dw_mask & DW_vm_page_speculate)
4840 vm_page_speculate(m, TRUE);
4841 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4842 /*
4843 * if we didn't hold the object lock and did this,
4844 * we might disconnect the page, then someone might
4845 * soft fault it back in, then we would put it on the
4846 * cleaned queue, and so we would have a referenced (maybe even dirty)
4847 * page on that queue, which we don't want
4848 */
4849 int refmod_state = pmap_disconnect(m->phys_page);
4850
4851 if ((refmod_state & VM_MEM_REFERENCED)) {
4852 /*
4853 * this page has been touched since it got cleaned; let's activate it
4854 * if it hasn't already been
4855 */
4856 vm_pageout_enqueued_cleaned++;
4857 vm_pageout_cleaned_reactivated++;
4858 vm_pageout_cleaned_commit_reactivated++;
4859
4860 if (m->active == FALSE)
4861 vm_page_activate(m);
4862 } else {
4863 m->reference = FALSE;
4864 vm_page_enqueue_cleaned(m);
4865 }
4866 }
4867 else if (dwp->dw_mask & DW_vm_page_lru)
4868 vm_page_lru(m);
4869 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4870 if ( !m->pageout_queue)
4871 VM_PAGE_QUEUES_REMOVE(m);
4872 }
4873 if (dwp->dw_mask & DW_set_reference)
4874 m->reference = TRUE;
4875 else if (dwp->dw_mask & DW_clear_reference)
4876 m->reference = FALSE;
4877
4878 if (dwp->dw_mask & DW_move_page) {
4879 if ( !m->pageout_queue) {
4880 VM_PAGE_QUEUES_REMOVE(m);
4881
4882 assert(m->object != kernel_object);
4883
4884 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4885 }
4886 }
4887 if (dwp->dw_mask & DW_clear_busy)
4888 m->busy = FALSE;
4889
4890 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4891 PAGE_WAKEUP(m);
4892 }
4893 }
4894 vm_page_unlock_queues();
4895
4896 if (local_free_q)
4897 vm_page_free_list(local_free_q, TRUE);
4898
4899 VM_CHECK_MEMORYSTATUS;
4900
4901 }
4902
4903 kern_return_t
4904 vm_page_alloc_list(
4905 int page_count,
4906 int flags,
4907 vm_page_t *list)
4908 {
4909 vm_page_t lo_page_list = VM_PAGE_NULL;
4910 vm_page_t mem;
4911 int i;
4912
4913 if ( !(flags & KMA_LOMEM))
4914 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4915
4916 for (i = 0; i < page_count; i++) {
4917
4918 mem = vm_page_grablo();
4919
4920 if (mem == VM_PAGE_NULL) {
4921 if (lo_page_list)
4922 vm_page_free_list(lo_page_list, FALSE);
4923
4924 *list = VM_PAGE_NULL;
4925
4926 return (KERN_RESOURCE_SHORTAGE);
4927 }
4928 mem->pageq.next = (queue_entry_t) lo_page_list;
4929 lo_page_list = mem;
4930 }
4931 *list = lo_page_list;
4932
4933 return (KERN_SUCCESS);
4934 }
4935
4936 void
4937 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4938 {
4939 page->offset = offset;
4940 }
4941
4942 vm_page_t
4943 vm_page_get_next(vm_page_t page)
4944 {
4945 return ((vm_page_t) page->pageq.next);
4946 }
4947
4948 vm_object_offset_t
4949 vm_page_get_offset(vm_page_t page)
4950 {
4951 return (page->offset);
4952 }
4953
4954 ppnum_t
4955 vm_page_get_phys_page(vm_page_t page)
4956 {
4957 return (page->phys_page);
4958 }
4959
4960
4961 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4962
4963 #if HIBERNATION
4964
4965 static vm_page_t hibernate_gobble_queue;
4966
4967 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4968
4969 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4970 static int hibernate_flush_dirty_pages(int);
4971 static int hibernate_flush_queue(queue_head_t *, int);
4972
4973 void hibernate_flush_wait(void);
4974 void hibernate_mark_in_progress(void);
4975 void hibernate_clear_in_progress(void);
4976
4977 void hibernate_free_range(int, int);
4978 void hibernate_hash_insert_page(vm_page_t);
4979 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4980 void hibernate_rebuild_vm_structs(void);
4981 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4982 ppnum_t hibernate_lookup_paddr(unsigned int);
4983
4984 struct hibernate_statistics {
4985 int hibernate_considered;
4986 int hibernate_reentered_on_q;
4987 int hibernate_found_dirty;
4988 int hibernate_skipped_cleaning;
4989 int hibernate_skipped_transient;
4990 int hibernate_skipped_precious;
4991 int hibernate_skipped_external;
4992 int hibernate_queue_nolock;
4993 int hibernate_queue_paused;
4994 int hibernate_throttled;
4995 int hibernate_throttle_timeout;
4996 int hibernate_drained;
4997 int hibernate_drain_timeout;
4998 int cd_lock_failed;
4999 int cd_found_precious;
5000 int cd_found_wired;
5001 int cd_found_busy;
5002 int cd_found_unusual;
5003 int cd_found_cleaning;
5004 int cd_found_laundry;
5005 int cd_found_dirty;
5006 int cd_found_xpmapped;
5007 int cd_skipped_xpmapped;
5008 int cd_local_free;
5009 int cd_total_free;
5010 int cd_vm_page_wire_count;
5011 int cd_vm_struct_pages_unneeded;
5012 int cd_pages;
5013 int cd_discarded;
5014 int cd_count_wire;
5015 } hibernate_stats;
5016
5017
5018 /*
5019 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5020 * so that we don't overrun the estimated image size, which would
5021 * result in a hibernation failure.
5022 */
5023 #define HIBERNATE_XPMAPPED_LIMIT 40000
5024
5025
5026 static int
5027 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5028 {
5029 wait_result_t wait_result;
5030
5031 vm_page_lock_queues();
5032
5033 while ( !queue_empty(&q->pgo_pending) ) {
5034
5035 q->pgo_draining = TRUE;
5036
5037 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5038
5039 vm_page_unlock_queues();
5040
5041 wait_result = thread_block(THREAD_CONTINUE_NULL);
5042
5043 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5044 hibernate_stats.hibernate_drain_timeout++;
5045
5046 if (q == &vm_pageout_queue_external)
5047 return (0);
5048
5049 return (1);
5050 }
5051 vm_page_lock_queues();
5052
5053 hibernate_stats.hibernate_drained++;
5054 }
5055 vm_page_unlock_queues();
5056
5057 return (0);
5058 }
5059
5060
5061 boolean_t hibernate_skip_external = FALSE;
5062
5063 static int
5064 hibernate_flush_queue(queue_head_t *q, int qcount)
5065 {
5066 vm_page_t m;
5067 vm_object_t l_object = NULL;
5068 vm_object_t m_object = NULL;
5069 int refmod_state = 0;
5070 int try_failed_count = 0;
5071 int retval = 0;
5072 int current_run = 0;
5073 struct vm_pageout_queue *iq;
5074 struct vm_pageout_queue *eq;
5075 struct vm_pageout_queue *tq;
5076
5077
5078 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5079
5080 iq = &vm_pageout_queue_internal;
5081 eq = &vm_pageout_queue_external;
5082
5083 vm_page_lock_queues();
5084
5085 while (qcount && !queue_empty(q)) {
5086
5087 if (current_run++ == 1000) {
5088 if (hibernate_should_abort()) {
5089 retval = 1;
5090 break;
5091 }
5092 current_run = 0;
5093 }
5094
5095 m = (vm_page_t) queue_first(q);
5096 m_object = m->object;
5097
5098 /*
5099 * check to see if we currently are working
5100 * with the same object... if so, we've
5101 * already got the lock
5102 */
5103 if (m_object != l_object) {
5104 /*
5105 * the object associated with candidate page is
5106 * different from the one we were just working
5107 * with... dump the lock if we still own it
5108 */
5109 if (l_object != NULL) {
5110 vm_object_unlock(l_object);
5111 l_object = NULL;
5112 }
5113 /*
5114 * Try to lock object; since we've alread got the
5115 * page queues lock, we can only 'try' for this one.
5116 * if the 'try' fails, we need to do a mutex_pause
5117 * to allow the owner of the object lock a chance to
5118 * run...
5119 */
5120 if ( !vm_object_lock_try_scan(m_object)) {
5121
5122 if (try_failed_count > 20) {
5123 hibernate_stats.hibernate_queue_nolock++;
5124
5125 goto reenter_pg_on_q;
5126 }
5127
5128 vm_page_unlock_queues();
5129 mutex_pause(try_failed_count++);
5130 vm_page_lock_queues();
5131
5132 hibernate_stats.hibernate_queue_paused++;
5133 continue;
5134 } else {
5135 l_object = m_object;
5136 }
5137 }
5138 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5139 /*
5140 * page is not to be cleaned
5141 * put it back on the head of its queue
5142 */
5143 if (m->cleaning)
5144 hibernate_stats.hibernate_skipped_cleaning++;
5145 else
5146 hibernate_stats.hibernate_skipped_transient++;
5147
5148 goto reenter_pg_on_q;
5149 }
5150 if (m_object->copy == VM_OBJECT_NULL) {
5151 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5152 /*
5153 * let the normal hibernate image path
5154 * deal with these
5155 */
5156 goto reenter_pg_on_q;
5157 }
5158 }
5159 if ( !m->dirty && m->pmapped) {
5160 refmod_state = pmap_get_refmod(m->phys_page);
5161
5162 if ((refmod_state & VM_MEM_MODIFIED)) {
5163 SET_PAGE_DIRTY(m, FALSE);
5164 }
5165 } else
5166 refmod_state = 0;
5167
5168 if ( !m->dirty) {
5169 /*
5170 * page is not to be cleaned
5171 * put it back on the head of its queue
5172 */
5173 if (m->precious)
5174 hibernate_stats.hibernate_skipped_precious++;
5175
5176 goto reenter_pg_on_q;
5177 }
5178
5179 if (hibernate_skip_external == TRUE && !m_object->internal) {
5180
5181 hibernate_stats.hibernate_skipped_external++;
5182
5183 goto reenter_pg_on_q;
5184 }
5185 tq = NULL;
5186
5187 if (m_object->internal) {
5188 if (VM_PAGE_Q_THROTTLED(iq))
5189 tq = iq;
5190 } else if (VM_PAGE_Q_THROTTLED(eq))
5191 tq = eq;
5192
5193 if (tq != NULL) {
5194 wait_result_t wait_result;
5195 int wait_count = 5;
5196
5197 if (l_object != NULL) {
5198 vm_object_unlock(l_object);
5199 l_object = NULL;
5200 }
5201
5202 while (retval == 0) {
5203
5204 tq->pgo_throttled = TRUE;
5205
5206 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5207
5208 vm_page_unlock_queues();
5209
5210 wait_result = thread_block(THREAD_CONTINUE_NULL);
5211
5212 vm_page_lock_queues();
5213
5214 if (wait_result != THREAD_TIMED_OUT)
5215 break;
5216 if (!VM_PAGE_Q_THROTTLED(tq))
5217 break;
5218
5219 if (hibernate_should_abort())
5220 retval = 1;
5221
5222 if (--wait_count == 0) {
5223
5224 hibernate_stats.hibernate_throttle_timeout++;
5225
5226 if (tq == eq) {
5227 hibernate_skip_external = TRUE;
5228 break;
5229 }
5230 retval = 1;
5231 }
5232 }
5233 if (retval)
5234 break;
5235
5236 hibernate_stats.hibernate_throttled++;
5237
5238 continue;
5239 }
5240 /*
5241 * we've already factored out pages in the laundry which
5242 * means this page can't be on the pageout queue so it's
5243 * safe to do the VM_PAGE_QUEUES_REMOVE
5244 */
5245 assert(!m->pageout_queue);
5246
5247 VM_PAGE_QUEUES_REMOVE(m);
5248
5249 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5250 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5251
5252 vm_pageout_cluster(m, FALSE);
5253
5254 hibernate_stats.hibernate_found_dirty++;
5255
5256 goto next_pg;
5257
5258 reenter_pg_on_q:
5259 queue_remove(q, m, vm_page_t, pageq);
5260 queue_enter(q, m, vm_page_t, pageq);
5261
5262 hibernate_stats.hibernate_reentered_on_q++;
5263 next_pg:
5264 hibernate_stats.hibernate_considered++;
5265
5266 qcount--;
5267 try_failed_count = 0;
5268 }
5269 if (l_object != NULL) {
5270 vm_object_unlock(l_object);
5271 l_object = NULL;
5272 }
5273
5274 vm_page_unlock_queues();
5275
5276 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5277
5278 return (retval);
5279 }
5280
5281
5282 static int
5283 hibernate_flush_dirty_pages(int pass)
5284 {
5285 struct vm_speculative_age_q *aq;
5286 uint32_t i;
5287
5288 if (vm_page_local_q) {
5289 for (i = 0; i < vm_page_local_q_count; i++)
5290 vm_page_reactivate_local(i, TRUE, FALSE);
5291 }
5292
5293 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5294 int qcount;
5295 vm_page_t m;
5296
5297 aq = &vm_page_queue_speculative[i];
5298
5299 if (queue_empty(&aq->age_q))
5300 continue;
5301 qcount = 0;
5302
5303 vm_page_lockspin_queues();
5304
5305 queue_iterate(&aq->age_q,
5306 m,
5307 vm_page_t,
5308 pageq)
5309 {
5310 qcount++;
5311 }
5312 vm_page_unlock_queues();
5313
5314 if (qcount) {
5315 if (hibernate_flush_queue(&aq->age_q, qcount))
5316 return (1);
5317 }
5318 }
5319 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5320 return (1);
5321 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5322 return (1);
5323 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5324 return (1);
5325 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5326 return (1);
5327
5328 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5329 vm_compressor_record_warmup_start();
5330
5331 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5332 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5333 vm_compressor_record_warmup_end();
5334 return (1);
5335 }
5336 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5337 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5338 vm_compressor_record_warmup_end();
5339 return (1);
5340 }
5341 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5342 vm_compressor_record_warmup_end();
5343
5344 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5345 return (1);
5346
5347 return (0);
5348 }
5349
5350
5351 void
5352 hibernate_reset_stats()
5353 {
5354 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5355 }
5356
5357
5358 int
5359 hibernate_flush_memory()
5360 {
5361 int retval;
5362
5363 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5364
5365 hibernate_cleaning_in_progress = TRUE;
5366 hibernate_skip_external = FALSE;
5367
5368 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5369
5370 if (COMPRESSED_PAGER_IS_ACTIVE) {
5371
5372 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5373
5374 vm_compressor_flush();
5375
5376 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5377 }
5378 if (consider_buffer_cache_collect != NULL) {
5379 unsigned int orig_wire_count;
5380
5381 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5382 orig_wire_count = vm_page_wire_count;
5383
5384 (void)(*consider_buffer_cache_collect)(1);
5385 consider_zone_gc(TRUE);
5386
5387 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5388
5389 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5390 }
5391 }
5392 hibernate_cleaning_in_progress = FALSE;
5393
5394 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5395
5396 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5397 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5398
5399
5400 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5401 hibernate_stats.hibernate_considered,
5402 hibernate_stats.hibernate_reentered_on_q,
5403 hibernate_stats.hibernate_found_dirty);
5404 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5405 hibernate_stats.hibernate_skipped_cleaning,
5406 hibernate_stats.hibernate_skipped_transient,
5407 hibernate_stats.hibernate_skipped_precious,
5408 hibernate_stats.hibernate_skipped_external,
5409 hibernate_stats.hibernate_queue_nolock);
5410 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5411 hibernate_stats.hibernate_queue_paused,
5412 hibernate_stats.hibernate_throttled,
5413 hibernate_stats.hibernate_throttle_timeout,
5414 hibernate_stats.hibernate_drained,
5415 hibernate_stats.hibernate_drain_timeout);
5416
5417 return (retval);
5418 }
5419
5420
5421 static void
5422 hibernate_page_list_zero(hibernate_page_list_t *list)
5423 {
5424 uint32_t bank;
5425 hibernate_bitmap_t * bitmap;
5426
5427 bitmap = &list->bank_bitmap[0];
5428 for (bank = 0; bank < list->bank_count; bank++)
5429 {
5430 uint32_t last_bit;
5431
5432 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5433 // set out-of-bound bits at end of bitmap.
5434 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5435 if (last_bit)
5436 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5437
5438 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5439 }
5440 }
5441
5442 void
5443 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5444 {
5445 uint32_t i;
5446 vm_page_t m;
5447 uint64_t start, end, timeout, nsec;
5448 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5449 clock_get_uptime(&start);
5450
5451 for (i = 0; i < gobble_count; i++)
5452 {
5453 while (VM_PAGE_NULL == (m = vm_page_grab()))
5454 {
5455 clock_get_uptime(&end);
5456 if (end >= timeout)
5457 break;
5458 VM_PAGE_WAIT();
5459 }
5460 if (!m)
5461 break;
5462 m->busy = FALSE;
5463 vm_page_gobble(m);
5464
5465 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5466 hibernate_gobble_queue = m;
5467 }
5468
5469 clock_get_uptime(&end);
5470 absolutetime_to_nanoseconds(end - start, &nsec);
5471 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5472 }
5473
5474 void
5475 hibernate_free_gobble_pages(void)
5476 {
5477 vm_page_t m, next;
5478 uint32_t count = 0;
5479
5480 m = (vm_page_t) hibernate_gobble_queue;
5481 while(m)
5482 {
5483 next = (vm_page_t) m->pageq.next;
5484 vm_page_free(m);
5485 count++;
5486 m = next;
5487 }
5488 hibernate_gobble_queue = VM_PAGE_NULL;
5489
5490 if (count)
5491 HIBLOG("Freed %d pages\n", count);
5492 }
5493
5494 static boolean_t
5495 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5496 {
5497 vm_object_t object = NULL;
5498 int refmod_state;
5499 boolean_t discard = FALSE;
5500
5501 do
5502 {
5503 if (m->private)
5504 panic("hibernate_consider_discard: private");
5505
5506 if (!vm_object_lock_try(m->object)) {
5507 if (!preflight) hibernate_stats.cd_lock_failed++;
5508 break;
5509 }
5510 object = m->object;
5511
5512 if (VM_PAGE_WIRED(m)) {
5513 if (!preflight) hibernate_stats.cd_found_wired++;
5514 break;
5515 }
5516 if (m->precious) {
5517 if (!preflight) hibernate_stats.cd_found_precious++;
5518 break;
5519 }
5520 if (m->busy || !object->alive) {
5521 /*
5522 * Somebody is playing with this page.
5523 */
5524 if (!preflight) hibernate_stats.cd_found_busy++;
5525 break;
5526 }
5527 if (m->absent || m->unusual || m->error) {
5528 /*
5529 * If it's unusual in anyway, ignore it
5530 */
5531 if (!preflight) hibernate_stats.cd_found_unusual++;
5532 break;
5533 }
5534 if (m->cleaning) {
5535 if (!preflight) hibernate_stats.cd_found_cleaning++;
5536 break;
5537 }
5538 if (m->laundry) {
5539 if (!preflight) hibernate_stats.cd_found_laundry++;
5540 break;
5541 }
5542 if (!m->dirty)
5543 {
5544 refmod_state = pmap_get_refmod(m->phys_page);
5545
5546 if (refmod_state & VM_MEM_REFERENCED)
5547 m->reference = TRUE;
5548 if (refmod_state & VM_MEM_MODIFIED) {
5549 SET_PAGE_DIRTY(m, FALSE);
5550 }
5551 }
5552
5553 /*
5554 * If it's clean or purgeable we can discard the page on wakeup.
5555 */
5556 discard = (!m->dirty)
5557 || (VM_PURGABLE_VOLATILE == object->purgable)
5558 || (VM_PURGABLE_EMPTY == object->purgable);
5559
5560
5561 if (discard == FALSE) {
5562 if (!preflight)
5563 hibernate_stats.cd_found_dirty++;
5564 } else if (m->xpmapped && m->reference && !object->internal) {
5565 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5566 if (!preflight)
5567 hibernate_stats.cd_found_xpmapped++;
5568 discard = FALSE;
5569 } else {
5570 if (!preflight)
5571 hibernate_stats.cd_skipped_xpmapped++;
5572 }
5573 }
5574 }
5575 while (FALSE);
5576
5577 if (object)
5578 vm_object_unlock(object);
5579
5580 return (discard);
5581 }
5582
5583
5584 static void
5585 hibernate_discard_page(vm_page_t m)
5586 {
5587 if (m->absent || m->unusual || m->error)
5588 /*
5589 * If it's unusual in anyway, ignore
5590 */
5591 return;
5592
5593 #if MACH_ASSERT || DEBUG
5594 vm_object_t object = m->object;
5595 if (!vm_object_lock_try(m->object))
5596 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5597 #else
5598 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5599 makes sure these locks are uncontended before sleep */
5600 #endif /* MACH_ASSERT || DEBUG */
5601
5602 if (m->pmapped == TRUE)
5603 {
5604 __unused int refmod_state = pmap_disconnect(m->phys_page);
5605 }
5606
5607 if (m->laundry)
5608 panic("hibernate_discard_page(%p) laundry", m);
5609 if (m->private)
5610 panic("hibernate_discard_page(%p) private", m);
5611 if (m->fictitious)
5612 panic("hibernate_discard_page(%p) fictitious", m);
5613
5614 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5615 {
5616 /* object should be on a queue */
5617 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5618 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5619 assert(old_queue);
5620 if (m->object->purgeable_when_ripe) {
5621 vm_purgeable_token_delete_first(old_queue);
5622 }
5623 m->object->purgable = VM_PURGABLE_EMPTY;
5624
5625 /*
5626 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5627 * accounted in the "volatile" ledger, so no change here.
5628 * We have to update vm_page_purgeable_count, though, since we're
5629 * effectively purging this object.
5630 */
5631 unsigned int delta;
5632 assert(m->object->resident_page_count >= m->object->wired_page_count);
5633 delta = (m->object->resident_page_count - m->object->wired_page_count);
5634 assert(vm_page_purgeable_count >= delta);
5635 assert(delta > 0);
5636 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5637 }
5638
5639 vm_page_free(m);
5640
5641 #if MACH_ASSERT || DEBUG
5642 vm_object_unlock(object);
5643 #endif /* MACH_ASSERT || DEBUG */
5644 }
5645
5646 /*
5647 Grab locks for hibernate_page_list_setall()
5648 */
5649 void
5650 hibernate_vm_lock_queues(void)
5651 {
5652 vm_object_lock(compressor_object);
5653 vm_page_lock_queues();
5654 lck_mtx_lock(&vm_page_queue_free_lock);
5655
5656 if (vm_page_local_q) {
5657 uint32_t i;
5658 for (i = 0; i < vm_page_local_q_count; i++) {
5659 struct vpl *lq;
5660 lq = &vm_page_local_q[i].vpl_un.vpl;
5661 VPL_LOCK(&lq->vpl_lock);
5662 }
5663 }
5664 }
5665
5666 void
5667 hibernate_vm_unlock_queues(void)
5668 {
5669 if (vm_page_local_q) {
5670 uint32_t i;
5671 for (i = 0; i < vm_page_local_q_count; i++) {
5672 struct vpl *lq;
5673 lq = &vm_page_local_q[i].vpl_un.vpl;
5674 VPL_UNLOCK(&lq->vpl_lock);
5675 }
5676 }
5677 lck_mtx_unlock(&vm_page_queue_free_lock);
5678 vm_page_unlock_queues();
5679 vm_object_unlock(compressor_object);
5680 }
5681
5682 /*
5683 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5684 pages known to VM to not need saving are subtracted.
5685 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5686 */
5687
5688 void
5689 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5690 hibernate_page_list_t * page_list_wired,
5691 hibernate_page_list_t * page_list_pal,
5692 boolean_t preflight,
5693 boolean_t will_discard,
5694 uint32_t * pagesOut)
5695 {
5696 uint64_t start, end, nsec;
5697 vm_page_t m;
5698 vm_page_t next;
5699 uint32_t pages = page_list->page_count;
5700 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5701 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5702 uint32_t count_wire = pages;
5703 uint32_t count_discard_active = 0;
5704 uint32_t count_discard_inactive = 0;
5705 uint32_t count_discard_cleaned = 0;
5706 uint32_t count_discard_purgeable = 0;
5707 uint32_t count_discard_speculative = 0;
5708 uint32_t count_discard_vm_struct_pages = 0;
5709 uint32_t i;
5710 uint32_t bank;
5711 hibernate_bitmap_t * bitmap;
5712 hibernate_bitmap_t * bitmap_wired;
5713 boolean_t discard_all;
5714 boolean_t discard;
5715
5716 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5717
5718 if (preflight) {
5719 page_list = NULL;
5720 page_list_wired = NULL;
5721 page_list_pal = NULL;
5722 discard_all = FALSE;
5723 } else {
5724 discard_all = will_discard;
5725 }
5726
5727 #if MACH_ASSERT || DEBUG
5728 if (!preflight)
5729 {
5730 vm_page_lock_queues();
5731 if (vm_page_local_q) {
5732 for (i = 0; i < vm_page_local_q_count; i++) {
5733 struct vpl *lq;
5734 lq = &vm_page_local_q[i].vpl_un.vpl;
5735 VPL_LOCK(&lq->vpl_lock);
5736 }
5737 }
5738 }
5739 #endif /* MACH_ASSERT || DEBUG */
5740
5741
5742 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5743
5744 clock_get_uptime(&start);
5745
5746 if (!preflight) {
5747 hibernate_page_list_zero(page_list);
5748 hibernate_page_list_zero(page_list_wired);
5749 hibernate_page_list_zero(page_list_pal);
5750
5751 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5752 hibernate_stats.cd_pages = pages;
5753 }
5754
5755 if (vm_page_local_q) {
5756 for (i = 0; i < vm_page_local_q_count; i++)
5757 vm_page_reactivate_local(i, TRUE, !preflight);
5758 }
5759
5760 if (preflight) {
5761 vm_object_lock(compressor_object);
5762 vm_page_lock_queues();
5763 lck_mtx_lock(&vm_page_queue_free_lock);
5764 }
5765
5766 m = (vm_page_t) hibernate_gobble_queue;
5767 while (m)
5768 {
5769 pages--;
5770 count_wire--;
5771 if (!preflight) {
5772 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5773 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5774 }
5775 m = (vm_page_t) m->pageq.next;
5776 }
5777
5778 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5779 {
5780 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5781 {
5782 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5783 {
5784 pages--;
5785 count_wire--;
5786 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5787 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5788
5789 hibernate_stats.cd_local_free++;
5790 hibernate_stats.cd_total_free++;
5791 }
5792 }
5793 }
5794
5795 for( i = 0; i < vm_colors; i++ )
5796 {
5797 queue_iterate(&vm_page_queue_free[i],
5798 m,
5799 vm_page_t,
5800 pageq)
5801 {
5802 pages--;
5803 count_wire--;
5804 if (!preflight) {
5805 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5806 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5807
5808 hibernate_stats.cd_total_free++;
5809 }
5810 }
5811 }
5812
5813 queue_iterate(&vm_lopage_queue_free,
5814 m,
5815 vm_page_t,
5816 pageq)
5817 {
5818 pages--;
5819 count_wire--;
5820 if (!preflight) {
5821 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5822 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5823
5824 hibernate_stats.cd_total_free++;
5825 }
5826 }
5827
5828 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5829 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5830 {
5831 next = (vm_page_t) m->pageq.next;
5832 discard = FALSE;
5833 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5834 && hibernate_consider_discard(m, preflight))
5835 {
5836 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5837 count_discard_inactive++;
5838 discard = discard_all;
5839 }
5840 else
5841 count_throttled++;
5842 count_wire--;
5843 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5844
5845 if (discard) hibernate_discard_page(m);
5846 m = next;
5847 }
5848
5849 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5850 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5851 {
5852 next = (vm_page_t) m->pageq.next;
5853 discard = FALSE;
5854 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5855 && hibernate_consider_discard(m, preflight))
5856 {
5857 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5858 if (m->dirty)
5859 count_discard_purgeable++;
5860 else
5861 count_discard_inactive++;
5862 discard = discard_all;
5863 }
5864 else
5865 count_anonymous++;
5866 count_wire--;
5867 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5868 if (discard) hibernate_discard_page(m);
5869 m = next;
5870 }
5871
5872 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5873 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5874 {
5875 next = (vm_page_t) m->pageq.next;
5876 discard = FALSE;
5877 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5878 && hibernate_consider_discard(m, preflight))
5879 {
5880 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5881 if (m->dirty)
5882 count_discard_purgeable++;
5883 else
5884 count_discard_cleaned++;
5885 discard = discard_all;
5886 }
5887 else
5888 count_cleaned++;
5889 count_wire--;
5890 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5891 if (discard) hibernate_discard_page(m);
5892 m = next;
5893 }
5894
5895 m = (vm_page_t) queue_first(&vm_page_queue_active);
5896 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5897 {
5898 next = (vm_page_t) m->pageq.next;
5899 discard = FALSE;
5900 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5901 && hibernate_consider_discard(m, preflight))
5902 {
5903 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5904 if (m->dirty)
5905 count_discard_purgeable++;
5906 else
5907 count_discard_active++;
5908 discard = discard_all;
5909 }
5910 else
5911 count_active++;
5912 count_wire--;
5913 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5914 if (discard) hibernate_discard_page(m);
5915 m = next;
5916 }
5917
5918 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5919 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5920 {
5921 next = (vm_page_t) m->pageq.next;
5922 discard = FALSE;
5923 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5924 && hibernate_consider_discard(m, preflight))
5925 {
5926 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5927 if (m->dirty)
5928 count_discard_purgeable++;
5929 else
5930 count_discard_inactive++;
5931 discard = discard_all;
5932 }
5933 else
5934 count_inactive++;
5935 count_wire--;
5936 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5937 if (discard) hibernate_discard_page(m);
5938 m = next;
5939 }
5940
5941 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5942 {
5943 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5944 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5945 {
5946 next = (vm_page_t) m->pageq.next;
5947 discard = FALSE;
5948 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5949 && hibernate_consider_discard(m, preflight))
5950 {
5951 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5952 count_discard_speculative++;
5953 discard = discard_all;
5954 }
5955 else
5956 count_speculative++;
5957 count_wire--;
5958 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5959 if (discard) hibernate_discard_page(m);
5960 m = next;
5961 }
5962 }
5963
5964 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5965 {
5966 count_compressor++;
5967 count_wire--;
5968 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5969 }
5970
5971 if (preflight == FALSE && discard_all == TRUE) {
5972 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5973
5974 HIBLOG("hibernate_teardown started\n");
5975 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5976 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5977
5978 pages -= count_discard_vm_struct_pages;
5979 count_wire -= count_discard_vm_struct_pages;
5980
5981 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5982
5983 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5984 }
5985
5986 if (!preflight) {
5987 // pull wired from hibernate_bitmap
5988 bitmap = &page_list->bank_bitmap[0];
5989 bitmap_wired = &page_list_wired->bank_bitmap[0];
5990 for (bank = 0; bank < page_list->bank_count; bank++)
5991 {
5992 for (i = 0; i < bitmap->bitmapwords; i++)
5993 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5994 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5995 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5996 }
5997 }
5998
5999 // machine dependent adjustments
6000 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6001
6002 if (!preflight) {
6003 hibernate_stats.cd_count_wire = count_wire;
6004 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6005 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6006 }
6007
6008 clock_get_uptime(&end);
6009 absolutetime_to_nanoseconds(end - start, &nsec);
6010 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6011
6012 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6013 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6014 discard_all ? "did" : "could",
6015 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6016
6017 if (hibernate_stats.cd_skipped_xpmapped)
6018 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6019
6020 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6021
6022 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6023
6024 #if MACH_ASSERT || DEBUG
6025 if (!preflight)
6026 {
6027 if (vm_page_local_q) {
6028 for (i = 0; i < vm_page_local_q_count; i++) {
6029 struct vpl *lq;
6030 lq = &vm_page_local_q[i].vpl_un.vpl;
6031 VPL_UNLOCK(&lq->vpl_lock);
6032 }
6033 }
6034 vm_page_unlock_queues();
6035 }
6036 #endif /* MACH_ASSERT || DEBUG */
6037
6038 if (preflight) {
6039 lck_mtx_unlock(&vm_page_queue_free_lock);
6040 vm_page_unlock_queues();
6041 vm_object_unlock(compressor_object);
6042 }
6043
6044 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6045 }
6046
6047 void
6048 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6049 {
6050 uint64_t start, end, nsec;
6051 vm_page_t m;
6052 vm_page_t next;
6053 uint32_t i;
6054 uint32_t count_discard_active = 0;
6055 uint32_t count_discard_inactive = 0;
6056 uint32_t count_discard_purgeable = 0;
6057 uint32_t count_discard_cleaned = 0;
6058 uint32_t count_discard_speculative = 0;
6059
6060
6061 #if MACH_ASSERT || DEBUG
6062 vm_page_lock_queues();
6063 if (vm_page_local_q) {
6064 for (i = 0; i < vm_page_local_q_count; i++) {
6065 struct vpl *lq;
6066 lq = &vm_page_local_q[i].vpl_un.vpl;
6067 VPL_LOCK(&lq->vpl_lock);
6068 }
6069 }
6070 #endif /* MACH_ASSERT || DEBUG */
6071
6072 clock_get_uptime(&start);
6073
6074 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6075 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6076 {
6077 next = (vm_page_t) m->pageq.next;
6078 if (hibernate_page_bittst(page_list, m->phys_page))
6079 {
6080 if (m->dirty)
6081 count_discard_purgeable++;
6082 else
6083 count_discard_inactive++;
6084 hibernate_discard_page(m);
6085 }
6086 m = next;
6087 }
6088
6089 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6090 {
6091 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6092 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6093 {
6094 next = (vm_page_t) m->pageq.next;
6095 if (hibernate_page_bittst(page_list, m->phys_page))
6096 {
6097 count_discard_speculative++;
6098 hibernate_discard_page(m);
6099 }
6100 m = next;
6101 }
6102 }
6103
6104 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6105 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6106 {
6107 next = (vm_page_t) m->pageq.next;
6108 if (hibernate_page_bittst(page_list, m->phys_page))
6109 {
6110 if (m->dirty)
6111 count_discard_purgeable++;
6112 else
6113 count_discard_inactive++;
6114 hibernate_discard_page(m);
6115 }
6116 m = next;
6117 }
6118
6119 m = (vm_page_t) queue_first(&vm_page_queue_active);
6120 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6121 {
6122 next = (vm_page_t) m->pageq.next;
6123 if (hibernate_page_bittst(page_list, m->phys_page))
6124 {
6125 if (m->dirty)
6126 count_discard_purgeable++;
6127 else
6128 count_discard_active++;
6129 hibernate_discard_page(m);
6130 }
6131 m = next;
6132 }
6133
6134 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6135 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6136 {
6137 next = (vm_page_t) m->pageq.next;
6138 if (hibernate_page_bittst(page_list, m->phys_page))
6139 {
6140 if (m->dirty)
6141 count_discard_purgeable++;
6142 else
6143 count_discard_cleaned++;
6144 hibernate_discard_page(m);
6145 }
6146 m = next;
6147 }
6148
6149 #if MACH_ASSERT || DEBUG
6150 if (vm_page_local_q) {
6151 for (i = 0; i < vm_page_local_q_count; i++) {
6152 struct vpl *lq;
6153 lq = &vm_page_local_q[i].vpl_un.vpl;
6154 VPL_UNLOCK(&lq->vpl_lock);
6155 }
6156 }
6157 vm_page_unlock_queues();
6158 #endif /* MACH_ASSERT || DEBUG */
6159
6160 clock_get_uptime(&end);
6161 absolutetime_to_nanoseconds(end - start, &nsec);
6162 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6163 nsec / 1000000ULL,
6164 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6165 }
6166
6167 boolean_t hibernate_paddr_map_inited = FALSE;
6168 boolean_t hibernate_rebuild_needed = FALSE;
6169 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
6170 vm_page_t hibernate_rebuild_hash_list = NULL;
6171
6172 unsigned int hibernate_teardown_found_tabled_pages = 0;
6173 unsigned int hibernate_teardown_found_created_pages = 0;
6174 unsigned int hibernate_teardown_found_free_pages = 0;
6175 unsigned int hibernate_teardown_vm_page_free_count;
6176
6177
6178 struct ppnum_mapping {
6179 struct ppnum_mapping *ppnm_next;
6180 ppnum_t ppnm_base_paddr;
6181 unsigned int ppnm_sindx;
6182 unsigned int ppnm_eindx;
6183 };
6184
6185 struct ppnum_mapping *ppnm_head;
6186 struct ppnum_mapping *ppnm_last_found = NULL;
6187
6188
6189 void
6190 hibernate_create_paddr_map()
6191 {
6192 unsigned int i;
6193 ppnum_t next_ppnum_in_run = 0;
6194 struct ppnum_mapping *ppnm = NULL;
6195
6196 if (hibernate_paddr_map_inited == FALSE) {
6197
6198 for (i = 0; i < vm_pages_count; i++) {
6199
6200 if (ppnm)
6201 ppnm->ppnm_eindx = i;
6202
6203 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6204
6205 ppnm = kalloc(sizeof(struct ppnum_mapping));
6206
6207 ppnm->ppnm_next = ppnm_head;
6208 ppnm_head = ppnm;
6209
6210 ppnm->ppnm_sindx = i;
6211 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6212 }
6213 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6214 }
6215 ppnm->ppnm_eindx++;
6216
6217 hibernate_paddr_map_inited = TRUE;
6218 }
6219 }
6220
6221 ppnum_t
6222 hibernate_lookup_paddr(unsigned int indx)
6223 {
6224 struct ppnum_mapping *ppnm = NULL;
6225
6226 ppnm = ppnm_last_found;
6227
6228 if (ppnm) {
6229 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6230 goto done;
6231 }
6232 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6233
6234 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6235 ppnm_last_found = ppnm;
6236 break;
6237 }
6238 }
6239 if (ppnm == NULL)
6240 panic("hibernate_lookup_paddr of %d failed\n", indx);
6241 done:
6242 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6243 }
6244
6245
6246 uint32_t
6247 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6248 {
6249 addr64_t saddr_aligned;
6250 addr64_t eaddr_aligned;
6251 addr64_t addr;
6252 ppnum_t paddr;
6253 unsigned int mark_as_unneeded_pages = 0;
6254
6255 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6256 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6257
6258 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6259
6260 paddr = pmap_find_phys(kernel_pmap, addr);
6261
6262 assert(paddr);
6263
6264 hibernate_page_bitset(page_list, TRUE, paddr);
6265 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6266
6267 mark_as_unneeded_pages++;
6268 }
6269 return (mark_as_unneeded_pages);
6270 }
6271
6272
6273 void
6274 hibernate_hash_insert_page(vm_page_t mem)
6275 {
6276 vm_page_bucket_t *bucket;
6277 int hash_id;
6278
6279 assert(mem->hashed);
6280 assert(mem->object);
6281 assert(mem->offset != (vm_object_offset_t) -1);
6282
6283 /*
6284 * Insert it into the object_object/offset hash table
6285 */
6286 hash_id = vm_page_hash(mem->object, mem->offset);
6287 bucket = &vm_page_buckets[hash_id];
6288
6289 mem->next_m = bucket->page_list;
6290 bucket->page_list = VM_PAGE_PACK_PTR(mem);
6291 }
6292
6293
6294 void
6295 hibernate_free_range(int sindx, int eindx)
6296 {
6297 vm_page_t mem;
6298 unsigned int color;
6299
6300 while (sindx < eindx) {
6301 mem = &vm_pages[sindx];
6302
6303 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6304
6305 mem->lopage = FALSE;
6306 mem->free = TRUE;
6307
6308 color = mem->phys_page & vm_color_mask;
6309 queue_enter_first(&vm_page_queue_free[color],
6310 mem,
6311 vm_page_t,
6312 pageq);
6313 vm_page_free_count++;
6314
6315 sindx++;
6316 }
6317 }
6318
6319
6320 extern void hibernate_rebuild_pmap_structs(void);
6321
6322 void
6323 hibernate_rebuild_vm_structs(void)
6324 {
6325 int cindx, sindx, eindx;
6326 vm_page_t mem, tmem, mem_next;
6327 AbsoluteTime startTime, endTime;
6328 uint64_t nsec;
6329
6330 if (hibernate_rebuild_needed == FALSE)
6331 return;
6332
6333 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6334 HIBLOG("hibernate_rebuild started\n");
6335
6336 clock_get_uptime(&startTime);
6337
6338 hibernate_rebuild_pmap_structs();
6339
6340 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6341 eindx = vm_pages_count;
6342
6343 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6344
6345 mem = &vm_pages[cindx];
6346 /*
6347 * hibernate_teardown_vm_structs leaves the location where
6348 * this vm_page_t must be located in "next".
6349 */
6350 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6351 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6352
6353 sindx = (int)(tmem - &vm_pages[0]);
6354
6355 if (mem != tmem) {
6356 /*
6357 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6358 * so move it back to its real location
6359 */
6360 *tmem = *mem;
6361 mem = tmem;
6362 }
6363 if (mem->hashed)
6364 hibernate_hash_insert_page(mem);
6365 /*
6366 * the 'hole' between this vm_page_t and the previous
6367 * vm_page_t we moved needs to be initialized as
6368 * a range of free vm_page_t's
6369 */
6370 hibernate_free_range(sindx + 1, eindx);
6371
6372 eindx = sindx;
6373 }
6374 if (sindx)
6375 hibernate_free_range(0, sindx);
6376
6377 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6378
6379 /*
6380 * process the list of vm_page_t's that were entered in the hash,
6381 * but were not located in the vm_pages arrary... these are
6382 * vm_page_t's that were created on the fly (i.e. fictitious)
6383 */
6384 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6385 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6386
6387 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6388 hibernate_hash_insert_page(mem);
6389 }
6390 hibernate_rebuild_hash_list = NULL;
6391
6392 clock_get_uptime(&endTime);
6393 SUB_ABSOLUTETIME(&endTime, &startTime);
6394 absolutetime_to_nanoseconds(endTime, &nsec);
6395
6396 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6397
6398 hibernate_rebuild_needed = FALSE;
6399
6400 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6401 }
6402
6403
6404 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6405
6406 uint32_t
6407 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6408 {
6409 unsigned int i;
6410 unsigned int compact_target_indx;
6411 vm_page_t mem, mem_next;
6412 vm_page_bucket_t *bucket;
6413 unsigned int mark_as_unneeded_pages = 0;
6414 unsigned int unneeded_vm_page_bucket_pages = 0;
6415 unsigned int unneeded_vm_pages_pages = 0;
6416 unsigned int unneeded_pmap_pages = 0;
6417 addr64_t start_of_unneeded = 0;
6418 addr64_t end_of_unneeded = 0;
6419
6420
6421 if (hibernate_should_abort())
6422 return (0);
6423
6424 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6425 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6426 vm_page_cleaned_count, compressor_object->resident_page_count);
6427
6428 for (i = 0; i < vm_page_bucket_count; i++) {
6429
6430 bucket = &vm_page_buckets[i];
6431
6432 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6433 assert(mem->hashed);
6434
6435 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6436
6437 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6438 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6439 hibernate_rebuild_hash_list = mem;
6440 }
6441 }
6442 }
6443 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6444 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6445
6446 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6447
6448 compact_target_indx = 0;
6449
6450 for (i = 0; i < vm_pages_count; i++) {
6451
6452 mem = &vm_pages[i];
6453
6454 if (mem->free) {
6455 unsigned int color;
6456
6457 assert(mem->busy);
6458 assert(!mem->lopage);
6459
6460 color = mem->phys_page & vm_color_mask;
6461
6462 queue_remove(&vm_page_queue_free[color],
6463 mem,
6464 vm_page_t,
6465 pageq);
6466 mem->pageq.next = NULL;
6467 mem->pageq.prev = NULL;
6468
6469 vm_page_free_count--;
6470
6471 hibernate_teardown_found_free_pages++;
6472
6473 if ( !vm_pages[compact_target_indx].free)
6474 compact_target_indx = i;
6475 } else {
6476 /*
6477 * record this vm_page_t's original location
6478 * we need this even if it doesn't get moved
6479 * as an indicator to the rebuild function that
6480 * we don't have to move it
6481 */
6482 mem->next_m = VM_PAGE_PACK_PTR(mem);
6483
6484 if (vm_pages[compact_target_indx].free) {
6485 /*
6486 * we've got a hole to fill, so
6487 * move this vm_page_t to it's new home
6488 */
6489 vm_pages[compact_target_indx] = *mem;
6490 mem->free = TRUE;
6491
6492 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6493 compact_target_indx++;
6494 } else
6495 hibernate_teardown_last_valid_compact_indx = i;
6496 }
6497 }
6498 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6499 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6500 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6501
6502 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6503
6504 if (start_of_unneeded) {
6505 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6506 mark_as_unneeded_pages += unneeded_pmap_pages;
6507 }
6508 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6509
6510 hibernate_rebuild_needed = TRUE;
6511
6512 return (mark_as_unneeded_pages);
6513 }
6514
6515
6516 #endif /* HIBERNATION */
6517
6518 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6519
6520 #include <mach_vm_debug.h>
6521 #if MACH_VM_DEBUG
6522
6523 #include <mach_debug/hash_info.h>
6524 #include <vm/vm_debug.h>
6525
6526 /*
6527 * Routine: vm_page_info
6528 * Purpose:
6529 * Return information about the global VP table.
6530 * Fills the buffer with as much information as possible
6531 * and returns the desired size of the buffer.
6532 * Conditions:
6533 * Nothing locked. The caller should provide
6534 * possibly-pageable memory.
6535 */
6536
6537 unsigned int
6538 vm_page_info(
6539 hash_info_bucket_t *info,
6540 unsigned int count)
6541 {
6542 unsigned int i;
6543 lck_spin_t *bucket_lock;
6544
6545 if (vm_page_bucket_count < count)
6546 count = vm_page_bucket_count;
6547
6548 for (i = 0; i < count; i++) {
6549 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6550 unsigned int bucket_count = 0;
6551 vm_page_t m;
6552
6553 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6554 lck_spin_lock(bucket_lock);
6555
6556 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6557 bucket_count++;
6558
6559 lck_spin_unlock(bucket_lock);
6560
6561 /* don't touch pageable memory while holding locks */
6562 info[i].hib_count = bucket_count;
6563 }
6564
6565 return vm_page_bucket_count;
6566 }
6567 #endif /* MACH_VM_DEBUG */
6568
6569 #if VM_PAGE_BUCKETS_CHECK
6570 void
6571 vm_page_buckets_check(void)
6572 {
6573 unsigned int i;
6574 vm_page_t p;
6575 unsigned int p_hash;
6576 vm_page_bucket_t *bucket;
6577 lck_spin_t *bucket_lock;
6578
6579 if (!vm_page_buckets_check_ready) {
6580 return;
6581 }
6582
6583 #if HIBERNATION
6584 if (hibernate_rebuild_needed ||
6585 hibernate_rebuild_hash_list) {
6586 panic("BUCKET_CHECK: hibernation in progress: "
6587 "rebuild_needed=%d rebuild_hash_list=%p\n",
6588 hibernate_rebuild_needed,
6589 hibernate_rebuild_hash_list);
6590 }
6591 #endif /* HIBERNATION */
6592
6593 #if VM_PAGE_FAKE_BUCKETS
6594 char *cp;
6595 for (cp = (char *) vm_page_fake_buckets_start;
6596 cp < (char *) vm_page_fake_buckets_end;
6597 cp++) {
6598 if (*cp != 0x5a) {
6599 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6600 "[0x%llx:0x%llx]\n",
6601 cp,
6602 (uint64_t) vm_page_fake_buckets_start,
6603 (uint64_t) vm_page_fake_buckets_end);
6604 }
6605 }
6606 #endif /* VM_PAGE_FAKE_BUCKETS */
6607
6608 for (i = 0; i < vm_page_bucket_count; i++) {
6609 bucket = &vm_page_buckets[i];
6610 if (!bucket->page_list) {
6611 continue;
6612 }
6613
6614 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6615 lck_spin_lock(bucket_lock);
6616 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6617 while (p != VM_PAGE_NULL) {
6618 if (!p->hashed) {
6619 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6620 "hash %d in bucket %d at %p "
6621 "is not hashed\n",
6622 p, p->object, p->offset,
6623 p_hash, i, bucket);
6624 }
6625 p_hash = vm_page_hash(p->object, p->offset);
6626 if (p_hash != i) {
6627 panic("BUCKET_CHECK: corruption in bucket %d "
6628 "at %p: page %p object %p offset 0x%llx "
6629 "hash %d\n",
6630 i, bucket, p, p->object, p->offset,
6631 p_hash);
6632 }
6633 p = VM_PAGE_UNPACK_PTR(p->next_m);
6634 }
6635 lck_spin_unlock(bucket_lock);
6636 }
6637
6638 // printf("BUCKET_CHECK: checked buckets\n");
6639 }
6640 #endif /* VM_PAGE_BUCKETS_CHECK */