]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/vm_resident.c
513877c18947d4448c8670667d8608c6e64b8be4
[apple/xnu.git] / osfmk / vm / vm_resident.c
1 /*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58 /*
59 * File: vm/vm_page.c
60 * Author: Avadis Tevanian, Jr., Michael Wayne Young
61 *
62 * Resident memory management module.
63 */
64
65 #include <debug.h>
66 #include <libkern/OSAtomic.h>
67
68 #include <mach/clock_types.h>
69 #include <mach/vm_prot.h>
70 #include <mach/vm_statistics.h>
71 #include <mach/sdt.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
74 #include <kern/task.h>
75 #include <kern/thread.h>
76 #include <kern/kalloc.h>
77 #include <kern/zalloc.h>
78 #include <kern/xpr.h>
79 #include <kern/ledger.h>
80 #include <vm/pmap.h>
81 #include <vm/vm_init.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_page.h>
84 #include <vm/vm_pageout.h>
85 #include <vm/vm_kern.h> /* kernel_memory_allocate() */
86 #include <kern/misc_protos.h>
87 #include <zone_debug.h>
88 #include <vm/cpm.h>
89 #include <pexpert/pexpert.h>
90
91 #include <vm/vm_protos.h>
92 #include <vm/memory_object.h>
93 #include <vm/vm_purgeable_internal.h>
94 #include <vm/vm_compressor.h>
95
96 #if CONFIG_PHANTOM_CACHE
97 #include <vm/vm_phantom_cache.h>
98 #endif
99
100 #include <IOKit/IOHibernatePrivate.h>
101
102 #include <sys/kdebug.h>
103
104 boolean_t hibernate_cleaning_in_progress = FALSE;
105 boolean_t vm_page_free_verify = TRUE;
106
107 uint32_t vm_lopage_free_count = 0;
108 uint32_t vm_lopage_free_limit = 0;
109 uint32_t vm_lopage_lowater = 0;
110 boolean_t vm_lopage_refill = FALSE;
111 boolean_t vm_lopage_needed = FALSE;
112
113 lck_mtx_ext_t vm_page_queue_lock_ext;
114 lck_mtx_ext_t vm_page_queue_free_lock_ext;
115 lck_mtx_ext_t vm_purgeable_queue_lock_ext;
116
117 int speculative_age_index = 0;
118 int speculative_steal_index = 0;
119 struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1];
120
121
122 __private_extern__ void vm_page_init_lck_grp(void);
123
124 static void vm_page_free_prepare(vm_page_t page);
125 static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr);
126
127
128
129
130 /*
131 * Associated with page of user-allocatable memory is a
132 * page structure.
133 */
134
135 /*
136 * These variables record the values returned by vm_page_bootstrap,
137 * for debugging purposes. The implementation of pmap_steal_memory
138 * and pmap_startup here also uses them internally.
139 */
140
141 vm_offset_t virtual_space_start;
142 vm_offset_t virtual_space_end;
143 uint32_t vm_page_pages;
144
145 /*
146 * The vm_page_lookup() routine, which provides for fast
147 * (virtual memory object, offset) to page lookup, employs
148 * the following hash table. The vm_page_{insert,remove}
149 * routines install and remove associations in the table.
150 * [This table is often called the virtual-to-physical,
151 * or VP, table.]
152 */
153 typedef struct {
154 vm_page_packed_t page_list;
155 #if MACH_PAGE_HASH_STATS
156 int cur_count; /* current count */
157 int hi_count; /* high water mark */
158 #endif /* MACH_PAGE_HASH_STATS */
159 } vm_page_bucket_t;
160
161
162 #define BUCKETS_PER_LOCK 16
163
164 vm_page_bucket_t *vm_page_buckets; /* Array of buckets */
165 unsigned int vm_page_bucket_count = 0; /* How big is array? */
166 unsigned int vm_page_hash_mask; /* Mask for hash function */
167 unsigned int vm_page_hash_shift; /* Shift for hash function */
168 uint32_t vm_page_bucket_hash; /* Basic bucket hash */
169 unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */
170
171 lck_spin_t *vm_page_bucket_locks;
172
173 #if VM_PAGE_BUCKETS_CHECK
174 boolean_t vm_page_buckets_check_ready = FALSE;
175 #if VM_PAGE_FAKE_BUCKETS
176 vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */
177 vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
178 #endif /* VM_PAGE_FAKE_BUCKETS */
179 #endif /* VM_PAGE_BUCKETS_CHECK */
180
181 #if MACH_PAGE_HASH_STATS
182 /* This routine is only for debug. It is intended to be called by
183 * hand by a developer using a kernel debugger. This routine prints
184 * out vm_page_hash table statistics to the kernel debug console.
185 */
186 void
187 hash_debug(void)
188 {
189 int i;
190 int numbuckets = 0;
191 int highsum = 0;
192 int maxdepth = 0;
193
194 for (i = 0; i < vm_page_bucket_count; i++) {
195 if (vm_page_buckets[i].hi_count) {
196 numbuckets++;
197 highsum += vm_page_buckets[i].hi_count;
198 if (vm_page_buckets[i].hi_count > maxdepth)
199 maxdepth = vm_page_buckets[i].hi_count;
200 }
201 }
202 printf("Total number of buckets: %d\n", vm_page_bucket_count);
203 printf("Number used buckets: %d = %d%%\n",
204 numbuckets, 100*numbuckets/vm_page_bucket_count);
205 printf("Number unused buckets: %d = %d%%\n",
206 vm_page_bucket_count - numbuckets,
207 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count);
208 printf("Sum of bucket max depth: %d\n", highsum);
209 printf("Average bucket depth: %d.%2d\n",
210 highsum/vm_page_bucket_count,
211 highsum%vm_page_bucket_count);
212 printf("Maximum bucket depth: %d\n", maxdepth);
213 }
214 #endif /* MACH_PAGE_HASH_STATS */
215
216 /*
217 * The virtual page size is currently implemented as a runtime
218 * variable, but is constant once initialized using vm_set_page_size.
219 * This initialization must be done in the machine-dependent
220 * bootstrap sequence, before calling other machine-independent
221 * initializations.
222 *
223 * All references to the virtual page size outside this
224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT
225 * constants.
226 */
227 vm_size_t page_size = PAGE_SIZE;
228 vm_size_t page_mask = PAGE_MASK;
229 int page_shift = PAGE_SHIFT;
230
231 /*
232 * Resident page structures are initialized from
233 * a template (see vm_page_alloc).
234 *
235 * When adding a new field to the virtual memory
236 * object structure, be sure to add initialization
237 * (see vm_page_bootstrap).
238 */
239 struct vm_page vm_page_template;
240
241 vm_page_t vm_pages = VM_PAGE_NULL;
242 unsigned int vm_pages_count = 0;
243 ppnum_t vm_page_lowest = 0;
244
245 /*
246 * Resident pages that represent real memory
247 * are allocated from a set of free lists,
248 * one per color.
249 */
250 unsigned int vm_colors;
251 unsigned int vm_color_mask; /* mask is == (vm_colors-1) */
252 unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */
253 unsigned int vm_free_magazine_refill_limit = 0;
254 queue_head_t vm_page_queue_free[MAX_COLORS];
255 unsigned int vm_page_free_wanted;
256 unsigned int vm_page_free_wanted_privileged;
257 unsigned int vm_page_free_count;
258 unsigned int vm_page_fictitious_count;
259
260 /*
261 * Occasionally, the virtual memory system uses
262 * resident page structures that do not refer to
263 * real pages, for example to leave a page with
264 * important state information in the VP table.
265 *
266 * These page structures are allocated the way
267 * most other kernel structures are.
268 */
269 zone_t vm_page_zone;
270 vm_locks_array_t vm_page_locks;
271 decl_lck_mtx_data(,vm_page_alloc_lock)
272 lck_mtx_ext_t vm_page_alloc_lock_ext;
273
274 unsigned int io_throttle_zero_fill;
275
276 unsigned int vm_page_local_q_count = 0;
277 unsigned int vm_page_local_q_soft_limit = 250;
278 unsigned int vm_page_local_q_hard_limit = 500;
279 struct vplq *vm_page_local_q = NULL;
280
281 /* N.B. Guard and fictitious pages must not
282 * be assigned a zero phys_page value.
283 */
284 /*
285 * Fictitious pages don't have a physical address,
286 * but we must initialize phys_page to something.
287 * For debugging, this should be a strange value
288 * that the pmap module can recognize in assertions.
289 */
290 ppnum_t vm_page_fictitious_addr = (ppnum_t) -1;
291
292 /*
293 * Guard pages are not accessible so they don't
294 * need a physical address, but we need to enter
295 * one in the pmap.
296 * Let's make it recognizable and make sure that
297 * we don't use a real physical page with that
298 * physical address.
299 */
300 ppnum_t vm_page_guard_addr = (ppnum_t) -2;
301
302 /*
303 * Resident page structures are also chained on
304 * queues that are used by the page replacement
305 * system (pageout daemon). These queues are
306 * defined here, but are shared by the pageout
307 * module. The inactive queue is broken into
308 * file backed and anonymous for convenience as the
309 * pageout daemon often assignes a higher
310 * importance to anonymous pages (less likely to pick)
311 */
312 queue_head_t vm_page_queue_active;
313 queue_head_t vm_page_queue_inactive;
314 queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */
315 queue_head_t vm_page_queue_throttled;
316
317 unsigned int vm_page_active_count;
318 unsigned int vm_page_inactive_count;
319 unsigned int vm_page_anonymous_count;
320 unsigned int vm_page_throttled_count;
321 unsigned int vm_page_speculative_count;
322 unsigned int vm_page_wire_count;
323 unsigned int vm_page_wire_count_initial;
324 unsigned int vm_page_gobble_count = 0;
325
326 #define VM_PAGE_WIRE_COUNT_WARNING 0
327 #define VM_PAGE_GOBBLE_COUNT_WARNING 0
328
329 unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */
330 unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */
331 uint64_t vm_page_purged_count = 0; /* total count of purged pages */
332
333 unsigned int vm_page_xpmapped_external_count = 0;
334 unsigned int vm_page_external_count = 0;
335 unsigned int vm_page_internal_count = 0;
336 unsigned int vm_page_pageable_external_count = 0;
337 unsigned int vm_page_pageable_internal_count = 0;
338
339 #if DEVELOPMENT || DEBUG
340 unsigned int vm_page_speculative_recreated = 0;
341 unsigned int vm_page_speculative_created = 0;
342 unsigned int vm_page_speculative_used = 0;
343 #endif
344
345 queue_head_t vm_page_queue_cleaned;
346
347 unsigned int vm_page_cleaned_count = 0;
348 unsigned int vm_pageout_enqueued_cleaned = 0;
349
350 uint64_t max_valid_dma_address = 0xffffffffffffffffULL;
351 ppnum_t max_valid_low_ppnum = 0xffffffff;
352
353
354 /*
355 * Several page replacement parameters are also
356 * shared with this module, so that page allocation
357 * (done here in vm_page_alloc) can trigger the
358 * pageout daemon.
359 */
360 unsigned int vm_page_free_target = 0;
361 unsigned int vm_page_free_min = 0;
362 unsigned int vm_page_throttle_limit = 0;
363 uint32_t vm_page_creation_throttle = 0;
364 unsigned int vm_page_inactive_target = 0;
365 unsigned int vm_page_anonymous_min = 0;
366 unsigned int vm_page_inactive_min = 0;
367 unsigned int vm_page_free_reserved = 0;
368 unsigned int vm_page_throttle_count = 0;
369
370
371 /*
372 * The VM system has a couple of heuristics for deciding
373 * that pages are "uninteresting" and should be placed
374 * on the inactive queue as likely candidates for replacement.
375 * These variables let the heuristics be controlled at run-time
376 * to make experimentation easier.
377 */
378
379 boolean_t vm_page_deactivate_hint = TRUE;
380
381 struct vm_page_stats_reusable vm_page_stats_reusable;
382
383 /*
384 * vm_set_page_size:
385 *
386 * Sets the page size, perhaps based upon the memory
387 * size. Must be called before any use of page-size
388 * dependent functions.
389 *
390 * Sets page_shift and page_mask from page_size.
391 */
392 void
393 vm_set_page_size(void)
394 {
395 page_size = PAGE_SIZE;
396 page_mask = PAGE_MASK;
397 page_shift = PAGE_SHIFT;
398
399 if ((page_mask & page_size) != 0)
400 panic("vm_set_page_size: page size not a power of two");
401
402 for (page_shift = 0; ; page_shift++)
403 if ((1U << page_shift) == page_size)
404 break;
405 }
406
407 #define COLOR_GROUPS_TO_STEAL 4
408
409
410 /* Called once during statup, once the cache geometry is known.
411 */
412 static void
413 vm_page_set_colors( void )
414 {
415 unsigned int n, override;
416
417 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
418 n = override;
419 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
420 n = vm_cache_geometry_colors;
421 else n = DEFAULT_COLORS; /* use default if all else fails */
422
423 if ( n == 0 )
424 n = 1;
425 if ( n > MAX_COLORS )
426 n = MAX_COLORS;
427
428 /* the count must be a power of 2 */
429 if ( ( n & (n - 1)) != 0 )
430 panic("vm_page_set_colors");
431
432 vm_colors = n;
433 vm_color_mask = n - 1;
434
435 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL;
436 }
437
438
439 lck_grp_t vm_page_lck_grp_free;
440 lck_grp_t vm_page_lck_grp_queue;
441 lck_grp_t vm_page_lck_grp_local;
442 lck_grp_t vm_page_lck_grp_purge;
443 lck_grp_t vm_page_lck_grp_alloc;
444 lck_grp_t vm_page_lck_grp_bucket;
445 lck_grp_attr_t vm_page_lck_grp_attr;
446 lck_attr_t vm_page_lck_attr;
447
448
449 __private_extern__ void
450 vm_page_init_lck_grp(void)
451 {
452 /*
453 * initialze the vm_page lock world
454 */
455 lck_grp_attr_setdefault(&vm_page_lck_grp_attr);
456 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr);
457 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr);
458 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr);
459 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr);
460 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr);
461 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr);
462 lck_attr_setdefault(&vm_page_lck_attr);
463 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr);
464
465 vm_compressor_init_locks();
466 }
467
468 void
469 vm_page_init_local_q()
470 {
471 unsigned int num_cpus;
472 unsigned int i;
473 struct vplq *t_local_q;
474
475 num_cpus = ml_get_max_cpus();
476
477 /*
478 * no point in this for a uni-processor system
479 */
480 if (num_cpus >= 2) {
481 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq));
482
483 for (i = 0; i < num_cpus; i++) {
484 struct vpl *lq;
485
486 lq = &t_local_q[i].vpl_un.vpl;
487 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr);
488 queue_init(&lq->vpl_queue);
489 lq->vpl_count = 0;
490 lq->vpl_internal_count = 0;
491 lq->vpl_external_count = 0;
492 }
493 vm_page_local_q_count = num_cpus;
494
495 vm_page_local_q = (struct vplq *)t_local_q;
496 }
497 }
498
499
500 /*
501 * vm_page_bootstrap:
502 *
503 * Initializes the resident memory module.
504 *
505 * Allocates memory for the page cells, and
506 * for the object/offset-to-page hash table headers.
507 * Each page cell is initialized and placed on the free list.
508 * Returns the range of available kernel virtual memory.
509 */
510
511 void
512 vm_page_bootstrap(
513 vm_offset_t *startp,
514 vm_offset_t *endp)
515 {
516 register vm_page_t m;
517 unsigned int i;
518 unsigned int log1;
519 unsigned int log2;
520 unsigned int size;
521
522 /*
523 * Initialize the vm_page template.
524 */
525
526 m = &vm_page_template;
527 bzero(m, sizeof (*m));
528
529 m->pageq.next = NULL;
530 m->pageq.prev = NULL;
531 m->listq.next = NULL;
532 m->listq.prev = NULL;
533 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
534
535 m->object = VM_OBJECT_NULL; /* reset later */
536 m->offset = (vm_object_offset_t) -1; /* reset later */
537
538 m->wire_count = 0;
539 m->local = FALSE;
540 m->inactive = FALSE;
541 m->active = FALSE;
542 m->pageout_queue = FALSE;
543 m->speculative = FALSE;
544 m->laundry = FALSE;
545 m->free = FALSE;
546 m->reference = FALSE;
547 m->gobbled = FALSE;
548 m->private = FALSE;
549 m->throttled = FALSE;
550 m->__unused_pageq_bits = 0;
551
552 m->phys_page = 0; /* reset later */
553
554 m->busy = TRUE;
555 m->wanted = FALSE;
556 m->tabled = FALSE;
557 m->hashed = FALSE;
558 m->fictitious = FALSE;
559 m->pmapped = FALSE;
560 m->wpmapped = FALSE;
561 m->pageout = FALSE;
562 m->absent = FALSE;
563 m->error = FALSE;
564 m->dirty = FALSE;
565 m->cleaning = FALSE;
566 m->precious = FALSE;
567 m->clustered = FALSE;
568 m->overwriting = FALSE;
569 m->restart = FALSE;
570 m->unusual = FALSE;
571 m->encrypted = FALSE;
572 m->encrypted_cleaning = FALSE;
573 m->cs_validated = FALSE;
574 m->cs_tainted = FALSE;
575 m->no_cache = FALSE;
576 m->reusable = FALSE;
577 m->slid = FALSE;
578 m->xpmapped = FALSE;
579 m->compressor = FALSE;
580 m->written_by_kernel = FALSE;
581 m->__unused_object_bits = 0;
582
583 /*
584 * Initialize the page queues.
585 */
586 vm_page_init_lck_grp();
587
588 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr);
589 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr);
590 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr);
591
592 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) {
593 int group;
594
595 purgeable_queues[i].token_q_head = 0;
596 purgeable_queues[i].token_q_tail = 0;
597 for (group = 0; group < NUM_VOLATILE_GROUPS; group++)
598 queue_init(&purgeable_queues[i].objq[group]);
599
600 purgeable_queues[i].type = i;
601 purgeable_queues[i].new_pages = 0;
602 #if MACH_ASSERT
603 purgeable_queues[i].debug_count_tokens = 0;
604 purgeable_queues[i].debug_count_objects = 0;
605 #endif
606 };
607 purgeable_nonvolatile_count = 0;
608 queue_init(&purgeable_nonvolatile_queue);
609
610 for (i = 0; i < MAX_COLORS; i++ )
611 queue_init(&vm_page_queue_free[i]);
612
613 queue_init(&vm_lopage_queue_free);
614 queue_init(&vm_page_queue_active);
615 queue_init(&vm_page_queue_inactive);
616 queue_init(&vm_page_queue_cleaned);
617 queue_init(&vm_page_queue_throttled);
618 queue_init(&vm_page_queue_anonymous);
619
620 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) {
621 queue_init(&vm_page_queue_speculative[i].age_q);
622
623 vm_page_queue_speculative[i].age_ts.tv_sec = 0;
624 vm_page_queue_speculative[i].age_ts.tv_nsec = 0;
625 }
626 vm_page_free_wanted = 0;
627 vm_page_free_wanted_privileged = 0;
628
629 vm_page_set_colors();
630
631
632 /*
633 * Steal memory for the map and zone subsystems.
634 */
635 kernel_debug_string("zone_steal_memory");
636 zone_steal_memory();
637 kernel_debug_string("vm_map_steal_memory");
638 vm_map_steal_memory();
639
640 /*
641 * Allocate (and initialize) the virtual-to-physical
642 * table hash buckets.
643 *
644 * The number of buckets should be a power of two to
645 * get a good hash function. The following computation
646 * chooses the first power of two that is greater
647 * than the number of physical pages in the system.
648 */
649
650 if (vm_page_bucket_count == 0) {
651 unsigned int npages = pmap_free_pages();
652
653 vm_page_bucket_count = 1;
654 while (vm_page_bucket_count < npages)
655 vm_page_bucket_count <<= 1;
656 }
657 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK;
658
659 vm_page_hash_mask = vm_page_bucket_count - 1;
660
661 /*
662 * Calculate object shift value for hashing algorithm:
663 * O = log2(sizeof(struct vm_object))
664 * B = log2(vm_page_bucket_count)
665 * hash shifts the object left by
666 * B/2 - O
667 */
668 size = vm_page_bucket_count;
669 for (log1 = 0; size > 1; log1++)
670 size /= 2;
671 size = sizeof(struct vm_object);
672 for (log2 = 0; size > 1; log2++)
673 size /= 2;
674 vm_page_hash_shift = log1/2 - log2 + 1;
675
676 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */
677 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */
678 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */
679
680 if (vm_page_hash_mask & vm_page_bucket_count)
681 printf("vm_page_bootstrap: WARNING -- strange page hash\n");
682
683 #if VM_PAGE_BUCKETS_CHECK
684 #if VM_PAGE_FAKE_BUCKETS
685 /*
686 * Allocate a decoy set of page buckets, to detect
687 * any stomping there.
688 */
689 vm_page_fake_buckets = (vm_page_bucket_t *)
690 pmap_steal_memory(vm_page_bucket_count *
691 sizeof(vm_page_bucket_t));
692 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets;
693 vm_page_fake_buckets_end =
694 vm_map_round_page((vm_page_fake_buckets_start +
695 (vm_page_bucket_count *
696 sizeof (vm_page_bucket_t))),
697 PAGE_MASK);
698 char *cp;
699 for (cp = (char *)vm_page_fake_buckets_start;
700 cp < (char *)vm_page_fake_buckets_end;
701 cp++) {
702 *cp = 0x5a;
703 }
704 #endif /* VM_PAGE_FAKE_BUCKETS */
705 #endif /* VM_PAGE_BUCKETS_CHECK */
706
707 kernel_debug_string("vm_page_buckets");
708 vm_page_buckets = (vm_page_bucket_t *)
709 pmap_steal_memory(vm_page_bucket_count *
710 sizeof(vm_page_bucket_t));
711
712 kernel_debug_string("vm_page_bucket_locks");
713 vm_page_bucket_locks = (lck_spin_t *)
714 pmap_steal_memory(vm_page_bucket_lock_count *
715 sizeof(lck_spin_t));
716
717 for (i = 0; i < vm_page_bucket_count; i++) {
718 register vm_page_bucket_t *bucket = &vm_page_buckets[i];
719
720 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
721 #if MACH_PAGE_HASH_STATS
722 bucket->cur_count = 0;
723 bucket->hi_count = 0;
724 #endif /* MACH_PAGE_HASH_STATS */
725 }
726
727 for (i = 0; i < vm_page_bucket_lock_count; i++)
728 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr);
729
730 #if VM_PAGE_BUCKETS_CHECK
731 vm_page_buckets_check_ready = TRUE;
732 #endif /* VM_PAGE_BUCKETS_CHECK */
733
734 /*
735 * Machine-dependent code allocates the resident page table.
736 * It uses vm_page_init to initialize the page frames.
737 * The code also returns to us the virtual space available
738 * to the kernel. We don't trust the pmap module
739 * to get the alignment right.
740 */
741
742 kernel_debug_string("pmap_startup");
743 pmap_startup(&virtual_space_start, &virtual_space_end);
744 virtual_space_start = round_page(virtual_space_start);
745 virtual_space_end = trunc_page(virtual_space_end);
746
747 *startp = virtual_space_start;
748 *endp = virtual_space_end;
749
750 /*
751 * Compute the initial "wire" count.
752 * Up until now, the pages which have been set aside are not under
753 * the VM system's control, so although they aren't explicitly
754 * wired, they nonetheless can't be moved. At this moment,
755 * all VM managed pages are "free", courtesy of pmap_startup.
756 */
757 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem));
758 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */
759 vm_page_wire_count_initial = vm_page_wire_count;
760
761 printf("vm_page_bootstrap: %d free pages and %d wired pages\n",
762 vm_page_free_count, vm_page_wire_count);
763
764 kernel_debug_string("vm_page_bootstrap complete");
765 simple_lock_init(&vm_paging_lock, 0);
766 }
767
768 #ifndef MACHINE_PAGES
769 /*
770 * We implement pmap_steal_memory and pmap_startup with the help
771 * of two simpler functions, pmap_virtual_space and pmap_next_page.
772 */
773
774 void *
775 pmap_steal_memory(
776 vm_size_t size)
777 {
778 vm_offset_t addr, vaddr;
779 ppnum_t phys_page;
780
781 /*
782 * We round the size to a round multiple.
783 */
784
785 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1);
786
787 /*
788 * If this is the first call to pmap_steal_memory,
789 * we have to initialize ourself.
790 */
791
792 if (virtual_space_start == virtual_space_end) {
793 pmap_virtual_space(&virtual_space_start, &virtual_space_end);
794
795 /*
796 * The initial values must be aligned properly, and
797 * we don't trust the pmap module to do it right.
798 */
799
800 virtual_space_start = round_page(virtual_space_start);
801 virtual_space_end = trunc_page(virtual_space_end);
802 }
803
804 /*
805 * Allocate virtual memory for this request.
806 */
807
808 addr = virtual_space_start;
809 virtual_space_start += size;
810
811 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */
812
813 /*
814 * Allocate and map physical pages to back new virtual pages.
815 */
816
817 for (vaddr = round_page(addr);
818 vaddr < addr + size;
819 vaddr += PAGE_SIZE) {
820
821 if (!pmap_next_page_hi(&phys_page))
822 panic("pmap_steal_memory");
823
824 /*
825 * XXX Logically, these mappings should be wired,
826 * but some pmap modules barf if they are.
827 */
828 #if defined(__LP64__)
829 pmap_pre_expand(kernel_pmap, vaddr);
830 #endif
831
832 pmap_enter(kernel_pmap, vaddr, phys_page,
833 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE,
834 VM_WIMG_USE_DEFAULT, FALSE);
835 /*
836 * Account for newly stolen memory
837 */
838 vm_page_wire_count++;
839
840 }
841
842 return (void *) addr;
843 }
844
845 void vm_page_release_startup(vm_page_t mem);
846 void
847 pmap_startup(
848 vm_offset_t *startp,
849 vm_offset_t *endp)
850 {
851 unsigned int i, npages, pages_initialized, fill, fillval;
852 ppnum_t phys_page;
853 addr64_t tmpaddr;
854
855
856 #if defined(__LP64__)
857 /*
858 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use
859 */
860 assert(sizeof(struct vm_page) == 64);
861
862 /*
863 * make sure we are aligned on a 64 byte boundary
864 * for VM_PAGE_PACK_PTR (it clips off the low-order
865 * 6 bits of the pointer)
866 */
867 if (virtual_space_start != virtual_space_end)
868 virtual_space_start = round_page(virtual_space_start);
869 #endif
870
871 /*
872 * We calculate how many page frames we will have
873 * and then allocate the page structures in one chunk.
874 */
875
876 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */
877 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */
878 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */
879
880 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages);
881
882 /*
883 * Initialize the page frames.
884 */
885 kernel_debug_string("Initialize the page frames");
886 for (i = 0, pages_initialized = 0; i < npages; i++) {
887 if (!pmap_next_page(&phys_page))
888 break;
889 if (pages_initialized == 0 || phys_page < vm_page_lowest)
890 vm_page_lowest = phys_page;
891
892 vm_page_init(&vm_pages[i], phys_page, FALSE);
893 vm_page_pages++;
894 pages_initialized++;
895 }
896 vm_pages_count = pages_initialized;
897
898 #if defined(__LP64__)
899
900 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0])
901 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]);
902
903 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1])
904 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]);
905 #endif
906 kernel_debug_string("page fill/release");
907 /*
908 * Check if we want to initialize pages to a known value
909 */
910 fill = 0; /* Assume no fill */
911 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
912 #if DEBUG
913 /* This slows down booting the DEBUG kernel, particularly on
914 * large memory systems, but is worthwhile in deterministically
915 * trapping uninitialized memory usage.
916 */
917 if (fill == 0) {
918 fill = 1;
919 fillval = 0xDEB8F177;
920 }
921 #endif
922 if (fill)
923 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval);
924 // -debug code remove
925 if (2 == vm_himemory_mode) {
926 // free low -> high so high is preferred
927 for (i = 1; i <= pages_initialized; i++) {
928 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
929 vm_page_release_startup(&vm_pages[i - 1]);
930 }
931 }
932 else
933 // debug code remove-
934
935 /*
936 * Release pages in reverse order so that physical pages
937 * initially get allocated in ascending addresses. This keeps
938 * the devices (which must address physical memory) happy if
939 * they require several consecutive pages.
940 */
941 for (i = pages_initialized; i > 0; i--) {
942 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */
943 vm_page_release_startup(&vm_pages[i - 1]);
944 }
945
946 VM_CHECK_MEMORYSTATUS;
947
948 #if 0
949 {
950 vm_page_t xx, xxo, xxl;
951 int i, j, k, l;
952
953 j = 0; /* (BRINGUP) */
954 xxl = 0;
955
956 for( i = 0; i < vm_colors; i++ ) {
957 queue_iterate(&vm_page_queue_free[i],
958 xx,
959 vm_page_t,
960 pageq) { /* BRINGUP */
961 j++; /* (BRINGUP) */
962 if(j > vm_page_free_count) { /* (BRINGUP) */
963 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl);
964 }
965
966 l = vm_page_free_count - j; /* (BRINGUP) */
967 k = 0; /* (BRINGUP) */
968
969 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count);
970
971 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */
972 k++;
973 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l);
974 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */
975 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo);
976 }
977 }
978
979 xxl = xx;
980 }
981 }
982
983 if(j != vm_page_free_count) { /* (BRINGUP) */
984 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count);
985 }
986 }
987 #endif
988
989
990 /*
991 * We have to re-align virtual_space_start,
992 * because pmap_steal_memory has been using it.
993 */
994
995 virtual_space_start = round_page(virtual_space_start);
996
997 *startp = virtual_space_start;
998 *endp = virtual_space_end;
999 }
1000 #endif /* MACHINE_PAGES */
1001
1002 /*
1003 * Routine: vm_page_module_init
1004 * Purpose:
1005 * Second initialization pass, to be done after
1006 * the basic VM system is ready.
1007 */
1008 void
1009 vm_page_module_init(void)
1010 {
1011 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page),
1012 0, PAGE_SIZE, "vm pages");
1013
1014 #if ZONE_DEBUG
1015 zone_debug_disable(vm_page_zone);
1016 #endif /* ZONE_DEBUG */
1017
1018 zone_change(vm_page_zone, Z_CALLERACCT, FALSE);
1019 zone_change(vm_page_zone, Z_EXPAND, FALSE);
1020 zone_change(vm_page_zone, Z_EXHAUST, TRUE);
1021 zone_change(vm_page_zone, Z_FOREIGN, TRUE);
1022 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE);
1023 /*
1024 * Adjust zone statistics to account for the real pages allocated
1025 * in vm_page_create(). [Q: is this really what we want?]
1026 */
1027 vm_page_zone->count += vm_page_pages;
1028 vm_page_zone->sum_count += vm_page_pages;
1029 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size;
1030 }
1031
1032 /*
1033 * Routine: vm_page_create
1034 * Purpose:
1035 * After the VM system is up, machine-dependent code
1036 * may stumble across more physical memory. For example,
1037 * memory that it was reserving for a frame buffer.
1038 * vm_page_create turns this memory into available pages.
1039 */
1040
1041 void
1042 vm_page_create(
1043 ppnum_t start,
1044 ppnum_t end)
1045 {
1046 ppnum_t phys_page;
1047 vm_page_t m;
1048
1049 for (phys_page = start;
1050 phys_page < end;
1051 phys_page++) {
1052 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page))
1053 == VM_PAGE_NULL)
1054 vm_page_more_fictitious();
1055
1056 m->fictitious = FALSE;
1057 pmap_clear_noencrypt(phys_page);
1058
1059 vm_page_pages++;
1060 vm_page_release(m);
1061 }
1062 }
1063
1064 /*
1065 * vm_page_hash:
1066 *
1067 * Distributes the object/offset key pair among hash buckets.
1068 *
1069 * NOTE: The bucket count must be a power of 2
1070 */
1071 #define vm_page_hash(object, offset) (\
1072 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\
1073 & vm_page_hash_mask)
1074
1075
1076 /*
1077 * vm_page_insert: [ internal use only ]
1078 *
1079 * Inserts the given mem entry into the object/object-page
1080 * table and object list.
1081 *
1082 * The object must be locked.
1083 */
1084 void
1085 vm_page_insert(
1086 vm_page_t mem,
1087 vm_object_t object,
1088 vm_object_offset_t offset)
1089 {
1090 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE);
1091 }
1092
1093 void
1094 vm_page_insert_internal(
1095 vm_page_t mem,
1096 vm_object_t object,
1097 vm_object_offset_t offset,
1098 boolean_t queues_lock_held,
1099 boolean_t insert_in_hash,
1100 boolean_t batch_pmap_op)
1101 {
1102 vm_page_bucket_t *bucket;
1103 lck_spin_t *bucket_lock;
1104 int hash_id;
1105 task_t owner;
1106
1107 XPR(XPR_VM_PAGE,
1108 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n",
1109 object, offset, mem, 0,0);
1110 #if 0
1111 /*
1112 * we may not hold the page queue lock
1113 * so this check isn't safe to make
1114 */
1115 VM_PAGE_CHECK(mem);
1116 #endif
1117
1118 assert(page_aligned(offset));
1119
1120 /* the vm_submap_object is only a placeholder for submaps */
1121 assert(object != vm_submap_object);
1122
1123 vm_object_lock_assert_exclusive(object);
1124 #if DEBUG
1125 lck_mtx_assert(&vm_page_queue_lock,
1126 queues_lock_held ? LCK_MTX_ASSERT_OWNED
1127 : LCK_MTX_ASSERT_NOTOWNED);
1128 #endif /* DEBUG */
1129
1130 if (insert_in_hash == TRUE) {
1131 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1132 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1133 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) "
1134 "already in (obj=%p,off=0x%llx)",
1135 mem, object, offset, mem->object, mem->offset);
1136 #endif
1137 assert(!object->internal || offset < object->vo_size);
1138
1139 /* only insert "pageout" pages into "pageout" objects,
1140 * and normal pages into normal objects */
1141 assert(object->pageout == mem->pageout);
1142
1143 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL);
1144
1145 /*
1146 * Record the object/offset pair in this page
1147 */
1148
1149 mem->object = object;
1150 mem->offset = offset;
1151
1152 /*
1153 * Insert it into the object_object/offset hash table
1154 */
1155 hash_id = vm_page_hash(object, offset);
1156 bucket = &vm_page_buckets[hash_id];
1157 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1158
1159 lck_spin_lock(bucket_lock);
1160
1161 mem->next_m = bucket->page_list;
1162 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1163 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list));
1164
1165 #if MACH_PAGE_HASH_STATS
1166 if (++bucket->cur_count > bucket->hi_count)
1167 bucket->hi_count = bucket->cur_count;
1168 #endif /* MACH_PAGE_HASH_STATS */
1169 mem->hashed = TRUE;
1170 lck_spin_unlock(bucket_lock);
1171 }
1172
1173 {
1174 unsigned int cache_attr;
1175
1176 cache_attr = object->wimg_bits & VM_WIMG_MASK;
1177
1178 if (cache_attr != VM_WIMG_USE_DEFAULT) {
1179 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op);
1180 }
1181 }
1182 /*
1183 * Now link into the object's list of backed pages.
1184 */
1185 VM_PAGE_INSERT(mem, object);
1186 mem->tabled = TRUE;
1187
1188 /*
1189 * Show that the object has one more resident page.
1190 */
1191
1192 object->resident_page_count++;
1193 if (VM_PAGE_WIRED(mem)) {
1194 object->wired_page_count++;
1195 }
1196 assert(object->resident_page_count >= object->wired_page_count);
1197
1198 if (object->internal) {
1199 OSAddAtomic(1, &vm_page_internal_count);
1200 } else {
1201 OSAddAtomic(1, &vm_page_external_count);
1202 }
1203
1204 /*
1205 * It wouldn't make sense to insert a "reusable" page in
1206 * an object (the page would have been marked "reusable" only
1207 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already
1208 * in the object at that time).
1209 * But a page could be inserted in a "all_reusable" object, if
1210 * something faults it in (a vm_read() from another task or a
1211 * "use-after-free" issue in user space, for example). It can
1212 * also happen if we're relocating a page from that object to
1213 * a different physical page during a physically-contiguous
1214 * allocation.
1215 */
1216 assert(!mem->reusable);
1217 if (mem->object->all_reusable) {
1218 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count);
1219 }
1220
1221 if (object->purgable == VM_PURGABLE_DENY) {
1222 owner = TASK_NULL;
1223 } else {
1224 owner = object->vo_purgeable_owner;
1225 }
1226 if (owner &&
1227 (object->purgable == VM_PURGABLE_NONVOLATILE ||
1228 VM_PAGE_WIRED(mem))) {
1229 /* more non-volatile bytes */
1230 ledger_credit(owner->ledger,
1231 task_ledgers.purgeable_nonvolatile,
1232 PAGE_SIZE);
1233 /* more footprint */
1234 ledger_credit(owner->ledger,
1235 task_ledgers.phys_footprint,
1236 PAGE_SIZE);
1237
1238 } else if (owner &&
1239 (object->purgable == VM_PURGABLE_VOLATILE ||
1240 object->purgable == VM_PURGABLE_EMPTY)) {
1241 assert(! VM_PAGE_WIRED(mem));
1242 /* more volatile bytes */
1243 ledger_credit(owner->ledger,
1244 task_ledgers.purgeable_volatile,
1245 PAGE_SIZE);
1246 }
1247
1248 if (object->purgable == VM_PURGABLE_VOLATILE) {
1249 if (VM_PAGE_WIRED(mem)) {
1250 OSAddAtomic(+1, &vm_page_purgeable_wired_count);
1251 } else {
1252 OSAddAtomic(+1, &vm_page_purgeable_count);
1253 }
1254 } else if (object->purgable == VM_PURGABLE_EMPTY &&
1255 mem->throttled) {
1256 /*
1257 * This page belongs to a purged VM object but hasn't
1258 * been purged (because it was "busy").
1259 * It's in the "throttled" queue and hence not
1260 * visible to vm_pageout_scan(). Move it to a pageable
1261 * queue, so that it can eventually be reclaimed, instead
1262 * of lingering in the "empty" object.
1263 */
1264 if (queues_lock_held == FALSE)
1265 vm_page_lockspin_queues();
1266 vm_page_deactivate(mem);
1267 if (queues_lock_held == FALSE)
1268 vm_page_unlock_queues();
1269 }
1270
1271 #if VM_OBJECT_TRACKING_OP_MODIFIED
1272 if (vm_object_tracking_inited &&
1273 object->internal &&
1274 object->resident_page_count == 0 &&
1275 object->pager == NULL &&
1276 object->shadow != NULL &&
1277 object->shadow->copy == object) {
1278 void *bt[VM_OBJECT_TRACKING_BTDEPTH];
1279 int numsaved = 0;
1280
1281 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH);
1282 btlog_add_entry(vm_object_tracking_btlog,
1283 object,
1284 VM_OBJECT_TRACKING_OP_MODIFIED,
1285 bt,
1286 numsaved);
1287 }
1288 #endif /* VM_OBJECT_TRACKING_OP_MODIFIED */
1289 }
1290
1291 /*
1292 * vm_page_replace:
1293 *
1294 * Exactly like vm_page_insert, except that we first
1295 * remove any existing page at the given offset in object.
1296 *
1297 * The object must be locked.
1298 */
1299 void
1300 vm_page_replace(
1301 register vm_page_t mem,
1302 register vm_object_t object,
1303 register vm_object_offset_t offset)
1304 {
1305 vm_page_bucket_t *bucket;
1306 vm_page_t found_m = VM_PAGE_NULL;
1307 lck_spin_t *bucket_lock;
1308 int hash_id;
1309
1310 #if 0
1311 /*
1312 * we don't hold the page queue lock
1313 * so this check isn't safe to make
1314 */
1315 VM_PAGE_CHECK(mem);
1316 #endif
1317 vm_object_lock_assert_exclusive(object);
1318 #if DEBUG || VM_PAGE_CHECK_BUCKETS
1319 if (mem->tabled || mem->object != VM_OBJECT_NULL)
1320 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) "
1321 "already in (obj=%p,off=0x%llx)",
1322 mem, object, offset, mem->object, mem->offset);
1323 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED);
1324 #endif
1325 /*
1326 * Record the object/offset pair in this page
1327 */
1328
1329 mem->object = object;
1330 mem->offset = offset;
1331
1332 /*
1333 * Insert it into the object_object/offset hash table,
1334 * replacing any page that might have been there.
1335 */
1336
1337 hash_id = vm_page_hash(object, offset);
1338 bucket = &vm_page_buckets[hash_id];
1339 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1340
1341 lck_spin_lock(bucket_lock);
1342
1343 if (bucket->page_list) {
1344 vm_page_packed_t *mp = &bucket->page_list;
1345 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp);
1346
1347 do {
1348 if (m->object == object && m->offset == offset) {
1349 /*
1350 * Remove old page from hash list
1351 */
1352 *mp = m->next_m;
1353 m->hashed = FALSE;
1354
1355 found_m = m;
1356 break;
1357 }
1358 mp = &m->next_m;
1359 } while ((m = VM_PAGE_UNPACK_PTR(*mp)));
1360
1361 mem->next_m = bucket->page_list;
1362 } else {
1363 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL);
1364 }
1365 /*
1366 * insert new page at head of hash list
1367 */
1368 bucket->page_list = VM_PAGE_PACK_PTR(mem);
1369 mem->hashed = TRUE;
1370
1371 lck_spin_unlock(bucket_lock);
1372
1373 if (found_m) {
1374 /*
1375 * there was already a page at the specified
1376 * offset for this object... remove it from
1377 * the object and free it back to the free list
1378 */
1379 vm_page_free_unlocked(found_m, FALSE);
1380 }
1381 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE);
1382 }
1383
1384 /*
1385 * vm_page_remove: [ internal use only ]
1386 *
1387 * Removes the given mem entry from the object/offset-page
1388 * table and the object page list.
1389 *
1390 * The object must be locked.
1391 */
1392
1393 void
1394 vm_page_remove(
1395 vm_page_t mem,
1396 boolean_t remove_from_hash)
1397 {
1398 vm_page_bucket_t *bucket;
1399 vm_page_t this;
1400 lck_spin_t *bucket_lock;
1401 int hash_id;
1402 task_t owner;
1403
1404 XPR(XPR_VM_PAGE,
1405 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n",
1406 mem->object, mem->offset,
1407 mem, 0,0);
1408
1409 vm_object_lock_assert_exclusive(mem->object);
1410 assert(mem->tabled);
1411 assert(!mem->cleaning);
1412 assert(!mem->laundry);
1413 #if 0
1414 /*
1415 * we don't hold the page queue lock
1416 * so this check isn't safe to make
1417 */
1418 VM_PAGE_CHECK(mem);
1419 #endif
1420 if (remove_from_hash == TRUE) {
1421 /*
1422 * Remove from the object_object/offset hash table
1423 */
1424 hash_id = vm_page_hash(mem->object, mem->offset);
1425 bucket = &vm_page_buckets[hash_id];
1426 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1427
1428 lck_spin_lock(bucket_lock);
1429
1430 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) {
1431 /* optimize for common case */
1432
1433 bucket->page_list = mem->next_m;
1434 } else {
1435 vm_page_packed_t *prev;
1436
1437 for (prev = &this->next_m;
1438 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem;
1439 prev = &this->next_m)
1440 continue;
1441 *prev = this->next_m;
1442 }
1443 #if MACH_PAGE_HASH_STATS
1444 bucket->cur_count--;
1445 #endif /* MACH_PAGE_HASH_STATS */
1446 mem->hashed = FALSE;
1447 lck_spin_unlock(bucket_lock);
1448 }
1449 /*
1450 * Now remove from the object's list of backed pages.
1451 */
1452
1453 VM_PAGE_REMOVE(mem);
1454
1455 /*
1456 * And show that the object has one fewer resident
1457 * page.
1458 */
1459
1460 assert(mem->object->resident_page_count > 0);
1461 mem->object->resident_page_count--;
1462
1463 if (mem->object->internal) {
1464 #if DEBUG
1465 assert(vm_page_internal_count);
1466 #endif /* DEBUG */
1467
1468 OSAddAtomic(-1, &vm_page_internal_count);
1469 } else {
1470 assert(vm_page_external_count);
1471 OSAddAtomic(-1, &vm_page_external_count);
1472
1473 if (mem->xpmapped) {
1474 assert(vm_page_xpmapped_external_count);
1475 OSAddAtomic(-1, &vm_page_xpmapped_external_count);
1476 }
1477 }
1478 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) {
1479 if (mem->object->resident_page_count == 0)
1480 vm_object_cache_remove(mem->object);
1481 }
1482
1483 if (VM_PAGE_WIRED(mem)) {
1484 assert(mem->object->wired_page_count > 0);
1485 mem->object->wired_page_count--;
1486 }
1487 assert(mem->object->resident_page_count >=
1488 mem->object->wired_page_count);
1489 if (mem->reusable) {
1490 assert(mem->object->reusable_page_count > 0);
1491 mem->object->reusable_page_count--;
1492 assert(mem->object->reusable_page_count <=
1493 mem->object->resident_page_count);
1494 mem->reusable = FALSE;
1495 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1496 vm_page_stats_reusable.reused_remove++;
1497 } else if (mem->object->all_reusable) {
1498 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count);
1499 vm_page_stats_reusable.reused_remove++;
1500 }
1501
1502 if (mem->object->purgable == VM_PURGABLE_DENY) {
1503 owner = TASK_NULL;
1504 } else {
1505 owner = mem->object->vo_purgeable_owner;
1506 }
1507 if (owner &&
1508 (mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
1509 VM_PAGE_WIRED(mem))) {
1510 /* less non-volatile bytes */
1511 ledger_debit(owner->ledger,
1512 task_ledgers.purgeable_nonvolatile,
1513 PAGE_SIZE);
1514 /* less footprint */
1515 ledger_debit(owner->ledger,
1516 task_ledgers.phys_footprint,
1517 PAGE_SIZE);
1518 } else if (owner &&
1519 (mem->object->purgable == VM_PURGABLE_VOLATILE ||
1520 mem->object->purgable == VM_PURGABLE_EMPTY)) {
1521 assert(! VM_PAGE_WIRED(mem));
1522 /* less volatile bytes */
1523 ledger_debit(owner->ledger,
1524 task_ledgers.purgeable_volatile,
1525 PAGE_SIZE);
1526 }
1527 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
1528 if (VM_PAGE_WIRED(mem)) {
1529 assert(vm_page_purgeable_wired_count > 0);
1530 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
1531 } else {
1532 assert(vm_page_purgeable_count > 0);
1533 OSAddAtomic(-1, &vm_page_purgeable_count);
1534 }
1535 }
1536 if (mem->object->set_cache_attr == TRUE)
1537 pmap_set_cache_attributes(mem->phys_page, 0);
1538
1539 mem->tabled = FALSE;
1540 mem->object = VM_OBJECT_NULL;
1541 mem->offset = (vm_object_offset_t) -1;
1542 }
1543
1544
1545 /*
1546 * vm_page_lookup:
1547 *
1548 * Returns the page associated with the object/offset
1549 * pair specified; if none is found, VM_PAGE_NULL is returned.
1550 *
1551 * The object must be locked. No side effects.
1552 */
1553
1554 unsigned long vm_page_lookup_hint = 0;
1555 unsigned long vm_page_lookup_hint_next = 0;
1556 unsigned long vm_page_lookup_hint_prev = 0;
1557 unsigned long vm_page_lookup_hint_miss = 0;
1558 unsigned long vm_page_lookup_bucket_NULL = 0;
1559 unsigned long vm_page_lookup_miss = 0;
1560
1561
1562 vm_page_t
1563 vm_page_lookup(
1564 vm_object_t object,
1565 vm_object_offset_t offset)
1566 {
1567 vm_page_t mem;
1568 vm_page_bucket_t *bucket;
1569 queue_entry_t qe;
1570 lck_spin_t *bucket_lock;
1571 int hash_id;
1572
1573 vm_object_lock_assert_held(object);
1574 mem = object->memq_hint;
1575
1576 if (mem != VM_PAGE_NULL) {
1577 assert(mem->object == object);
1578
1579 if (mem->offset == offset) {
1580 vm_page_lookup_hint++;
1581 return mem;
1582 }
1583 qe = queue_next(&mem->listq);
1584
1585 if (! queue_end(&object->memq, qe)) {
1586 vm_page_t next_page;
1587
1588 next_page = (vm_page_t) qe;
1589 assert(next_page->object == object);
1590
1591 if (next_page->offset == offset) {
1592 vm_page_lookup_hint_next++;
1593 object->memq_hint = next_page; /* new hint */
1594 return next_page;
1595 }
1596 }
1597 qe = queue_prev(&mem->listq);
1598
1599 if (! queue_end(&object->memq, qe)) {
1600 vm_page_t prev_page;
1601
1602 prev_page = (vm_page_t) qe;
1603 assert(prev_page->object == object);
1604
1605 if (prev_page->offset == offset) {
1606 vm_page_lookup_hint_prev++;
1607 object->memq_hint = prev_page; /* new hint */
1608 return prev_page;
1609 }
1610 }
1611 }
1612 /*
1613 * Search the hash table for this object/offset pair
1614 */
1615 hash_id = vm_page_hash(object, offset);
1616 bucket = &vm_page_buckets[hash_id];
1617
1618 /*
1619 * since we hold the object lock, we are guaranteed that no
1620 * new pages can be inserted into this object... this in turn
1621 * guarantess that the page we're looking for can't exist
1622 * if the bucket it hashes to is currently NULL even when looked
1623 * at outside the scope of the hash bucket lock... this is a
1624 * really cheap optimiztion to avoid taking the lock
1625 */
1626 if (!bucket->page_list) {
1627 vm_page_lookup_bucket_NULL++;
1628
1629 return (VM_PAGE_NULL);
1630 }
1631 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK];
1632
1633 lck_spin_lock(bucket_lock);
1634
1635 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) {
1636 #if 0
1637 /*
1638 * we don't hold the page queue lock
1639 * so this check isn't safe to make
1640 */
1641 VM_PAGE_CHECK(mem);
1642 #endif
1643 if ((mem->object == object) && (mem->offset == offset))
1644 break;
1645 }
1646 lck_spin_unlock(bucket_lock);
1647
1648 if (mem != VM_PAGE_NULL) {
1649 if (object->memq_hint != VM_PAGE_NULL) {
1650 vm_page_lookup_hint_miss++;
1651 }
1652 assert(mem->object == object);
1653 object->memq_hint = mem;
1654 } else
1655 vm_page_lookup_miss++;
1656
1657 return(mem);
1658 }
1659
1660
1661 /*
1662 * vm_page_rename:
1663 *
1664 * Move the given memory entry from its
1665 * current object to the specified target object/offset.
1666 *
1667 * The object must be locked.
1668 */
1669 void
1670 vm_page_rename(
1671 register vm_page_t mem,
1672 register vm_object_t new_object,
1673 vm_object_offset_t new_offset,
1674 boolean_t encrypted_ok)
1675 {
1676 boolean_t internal_to_external, external_to_internal;
1677
1678 assert(mem->object != new_object);
1679
1680 /*
1681 * ENCRYPTED SWAP:
1682 * The encryption key is based on the page's memory object
1683 * (aka "pager") and paging offset. Moving the page to
1684 * another VM object changes its "pager" and "paging_offset"
1685 * so it has to be decrypted first, or we would lose the key.
1686 *
1687 * One exception is VM object collapsing, where we transfer pages
1688 * from one backing object to its parent object. This operation also
1689 * transfers the paging information, so the <pager,paging_offset> info
1690 * should remain consistent. The caller (vm_object_do_collapse())
1691 * sets "encrypted_ok" in this case.
1692 */
1693 if (!encrypted_ok && mem->encrypted) {
1694 panic("vm_page_rename: page %p is encrypted\n", mem);
1695 }
1696
1697 XPR(XPR_VM_PAGE,
1698 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n",
1699 new_object, new_offset,
1700 mem, 0,0);
1701
1702 /*
1703 * Changes to mem->object require the page lock because
1704 * the pageout daemon uses that lock to get the object.
1705 */
1706 vm_page_lockspin_queues();
1707
1708 internal_to_external = FALSE;
1709 external_to_internal = FALSE;
1710
1711 if (mem->local) {
1712 /*
1713 * it's much easier to get the vm_page_pageable_xxx accounting correct
1714 * if we first move the page to the active queue... it's going to end
1715 * up there anyway, and we don't do vm_page_rename's frequently enough
1716 * for this to matter.
1717 */
1718 VM_PAGE_QUEUES_REMOVE(mem);
1719 vm_page_activate(mem);
1720 }
1721 if (mem->active || mem->inactive || mem->speculative) {
1722 if (mem->object->internal && !new_object->internal) {
1723 internal_to_external = TRUE;
1724 }
1725 if (!mem->object->internal && new_object->internal) {
1726 external_to_internal = TRUE;
1727 }
1728 }
1729
1730 vm_page_remove(mem, TRUE);
1731 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE);
1732
1733 if (internal_to_external) {
1734 vm_page_pageable_internal_count--;
1735 vm_page_pageable_external_count++;
1736 } else if (external_to_internal) {
1737 vm_page_pageable_external_count--;
1738 vm_page_pageable_internal_count++;
1739 }
1740
1741 vm_page_unlock_queues();
1742 }
1743
1744 /*
1745 * vm_page_init:
1746 *
1747 * Initialize the fields in a new page.
1748 * This takes a structure with random values and initializes it
1749 * so that it can be given to vm_page_release or vm_page_insert.
1750 */
1751 void
1752 vm_page_init(
1753 vm_page_t mem,
1754 ppnum_t phys_page,
1755 boolean_t lopage)
1756 {
1757 assert(phys_page);
1758
1759 #if DEBUG
1760 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) {
1761 if (!(pmap_valid_page(phys_page))) {
1762 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page);
1763 }
1764 }
1765 #endif
1766 *mem = vm_page_template;
1767 mem->phys_page = phys_page;
1768 #if 0
1769 /*
1770 * we're leaving this turned off for now... currently pages
1771 * come off the free list and are either immediately dirtied/referenced
1772 * due to zero-fill or COW faults, or are used to read or write files...
1773 * in the file I/O case, the UPL mechanism takes care of clearing
1774 * the state of the HW ref/mod bits in a somewhat fragile way.
1775 * Since we may change the way this works in the future (to toughen it up),
1776 * I'm leaving this as a reminder of where these bits could get cleared
1777 */
1778
1779 /*
1780 * make sure both the h/w referenced and modified bits are
1781 * clear at this point... we are especially dependent on
1782 * not finding a 'stale' h/w modified in a number of spots
1783 * once this page goes back into use
1784 */
1785 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
1786 #endif
1787 mem->lopage = lopage;
1788 }
1789
1790 /*
1791 * vm_page_grab_fictitious:
1792 *
1793 * Remove a fictitious page from the free list.
1794 * Returns VM_PAGE_NULL if there are no free pages.
1795 */
1796 int c_vm_page_grab_fictitious = 0;
1797 int c_vm_page_grab_fictitious_failed = 0;
1798 int c_vm_page_release_fictitious = 0;
1799 int c_vm_page_more_fictitious = 0;
1800
1801 vm_page_t
1802 vm_page_grab_fictitious_common(
1803 ppnum_t phys_addr)
1804 {
1805 vm_page_t m;
1806
1807 if ((m = (vm_page_t)zget(vm_page_zone))) {
1808
1809 vm_page_init(m, phys_addr, FALSE);
1810 m->fictitious = TRUE;
1811
1812 c_vm_page_grab_fictitious++;
1813 } else
1814 c_vm_page_grab_fictitious_failed++;
1815
1816 return m;
1817 }
1818
1819 vm_page_t
1820 vm_page_grab_fictitious(void)
1821 {
1822 return vm_page_grab_fictitious_common(vm_page_fictitious_addr);
1823 }
1824
1825 vm_page_t
1826 vm_page_grab_guard(void)
1827 {
1828 return vm_page_grab_fictitious_common(vm_page_guard_addr);
1829 }
1830
1831
1832 /*
1833 * vm_page_release_fictitious:
1834 *
1835 * Release a fictitious page to the zone pool
1836 */
1837 void
1838 vm_page_release_fictitious(
1839 vm_page_t m)
1840 {
1841 assert(!m->free);
1842 assert(m->fictitious);
1843 assert(m->phys_page == vm_page_fictitious_addr ||
1844 m->phys_page == vm_page_guard_addr);
1845
1846 c_vm_page_release_fictitious++;
1847
1848 zfree(vm_page_zone, m);
1849 }
1850
1851 /*
1852 * vm_page_more_fictitious:
1853 *
1854 * Add more fictitious pages to the zone.
1855 * Allowed to block. This routine is way intimate
1856 * with the zones code, for several reasons:
1857 * 1. we need to carve some page structures out of physical
1858 * memory before zones work, so they _cannot_ come from
1859 * the zone_map.
1860 * 2. the zone needs to be collectable in order to prevent
1861 * growth without bound. These structures are used by
1862 * the device pager (by the hundreds and thousands), as
1863 * private pages for pageout, and as blocking pages for
1864 * pagein. Temporary bursts in demand should not result in
1865 * permanent allocation of a resource.
1866 * 3. To smooth allocation humps, we allocate single pages
1867 * with kernel_memory_allocate(), and cram them into the
1868 * zone.
1869 */
1870
1871 void vm_page_more_fictitious(void)
1872 {
1873 vm_offset_t addr;
1874 kern_return_t retval;
1875
1876 c_vm_page_more_fictitious++;
1877
1878 /*
1879 * Allocate a single page from the zone_map. Do not wait if no physical
1880 * pages are immediately available, and do not zero the space. We need
1881 * our own blocking lock here to prevent having multiple,
1882 * simultaneous requests from piling up on the zone_map lock. Exactly
1883 * one (of our) threads should be potentially waiting on the map lock.
1884 * If winner is not vm-privileged, then the page allocation will fail,
1885 * and it will temporarily block here in the vm_page_wait().
1886 */
1887 lck_mtx_lock(&vm_page_alloc_lock);
1888 /*
1889 * If another thread allocated space, just bail out now.
1890 */
1891 if (zone_free_count(vm_page_zone) > 5) {
1892 /*
1893 * The number "5" is a small number that is larger than the
1894 * number of fictitious pages that any single caller will
1895 * attempt to allocate. Otherwise, a thread will attempt to
1896 * acquire a fictitious page (vm_page_grab_fictitious), fail,
1897 * release all of the resources and locks already acquired,
1898 * and then call this routine. This routine finds the pages
1899 * that the caller released, so fails to allocate new space.
1900 * The process repeats infinitely. The largest known number
1901 * of fictitious pages required in this manner is 2. 5 is
1902 * simply a somewhat larger number.
1903 */
1904 lck_mtx_unlock(&vm_page_alloc_lock);
1905 return;
1906 }
1907
1908 retval = kernel_memory_allocate(zone_map,
1909 &addr, PAGE_SIZE, VM_PROT_ALL,
1910 KMA_KOBJECT|KMA_NOPAGEWAIT);
1911 if (retval != KERN_SUCCESS) {
1912 /*
1913 * No page was available. Drop the
1914 * lock to give another thread a chance at it, and
1915 * wait for the pageout daemon to make progress.
1916 */
1917 lck_mtx_unlock(&vm_page_alloc_lock);
1918 vm_page_wait(THREAD_UNINT);
1919 return;
1920 }
1921
1922 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */
1923 OSAddAtomic64(1, &(vm_page_zone->page_count));
1924
1925 zcram(vm_page_zone, addr, PAGE_SIZE);
1926
1927 lck_mtx_unlock(&vm_page_alloc_lock);
1928 }
1929
1930
1931 /*
1932 * vm_pool_low():
1933 *
1934 * Return true if it is not likely that a non-vm_privileged thread
1935 * can get memory without blocking. Advisory only, since the
1936 * situation may change under us.
1937 */
1938 int
1939 vm_pool_low(void)
1940 {
1941 /* No locking, at worst we will fib. */
1942 return( vm_page_free_count <= vm_page_free_reserved );
1943 }
1944
1945
1946
1947 /*
1948 * this is an interface to support bring-up of drivers
1949 * on platforms with physical memory > 4G...
1950 */
1951 int vm_himemory_mode = 2;
1952
1953
1954 /*
1955 * this interface exists to support hardware controllers
1956 * incapable of generating DMAs with more than 32 bits
1957 * of address on platforms with physical memory > 4G...
1958 */
1959 unsigned int vm_lopages_allocated_q = 0;
1960 unsigned int vm_lopages_allocated_cpm_success = 0;
1961 unsigned int vm_lopages_allocated_cpm_failed = 0;
1962 queue_head_t vm_lopage_queue_free;
1963
1964 vm_page_t
1965 vm_page_grablo(void)
1966 {
1967 vm_page_t mem;
1968
1969 if (vm_lopage_needed == FALSE)
1970 return (vm_page_grab());
1971
1972 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1973
1974 if ( !queue_empty(&vm_lopage_queue_free)) {
1975 queue_remove_first(&vm_lopage_queue_free,
1976 mem,
1977 vm_page_t,
1978 pageq);
1979 assert(vm_lopage_free_count);
1980
1981 vm_lopage_free_count--;
1982 vm_lopages_allocated_q++;
1983
1984 if (vm_lopage_free_count < vm_lopage_lowater)
1985 vm_lopage_refill = TRUE;
1986
1987 lck_mtx_unlock(&vm_page_queue_free_lock);
1988 } else {
1989 lck_mtx_unlock(&vm_page_queue_free_lock);
1990
1991 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) {
1992
1993 lck_mtx_lock_spin(&vm_page_queue_free_lock);
1994 vm_lopages_allocated_cpm_failed++;
1995 lck_mtx_unlock(&vm_page_queue_free_lock);
1996
1997 return (VM_PAGE_NULL);
1998 }
1999 mem->busy = TRUE;
2000
2001 vm_page_lockspin_queues();
2002
2003 mem->gobbled = FALSE;
2004 vm_page_gobble_count--;
2005 vm_page_wire_count--;
2006
2007 vm_lopages_allocated_cpm_success++;
2008 vm_page_unlock_queues();
2009 }
2010 assert(mem->busy);
2011 assert(!mem->free);
2012 assert(!mem->pmapped);
2013 assert(!mem->wpmapped);
2014 assert(!pmap_is_noencrypt(mem->phys_page));
2015
2016 mem->pageq.next = NULL;
2017 mem->pageq.prev = NULL;
2018
2019 return (mem);
2020 }
2021
2022
2023 /*
2024 * vm_page_grab:
2025 *
2026 * first try to grab a page from the per-cpu free list...
2027 * this must be done while pre-emption is disabled... if
2028 * a page is available, we're done...
2029 * if no page is available, grab the vm_page_queue_free_lock
2030 * and see if current number of free pages would allow us
2031 * to grab at least 1... if not, return VM_PAGE_NULL as before...
2032 * if there are pages available, disable preemption and
2033 * recheck the state of the per-cpu free list... we could
2034 * have been preempted and moved to a different cpu, or
2035 * some other thread could have re-filled it... if still
2036 * empty, figure out how many pages we can steal from the
2037 * global free queue and move to the per-cpu queue...
2038 * return 1 of these pages when done... only wakeup the
2039 * pageout_scan thread if we moved pages from the global
2040 * list... no need for the wakeup if we've satisfied the
2041 * request from the per-cpu queue.
2042 */
2043
2044
2045 vm_page_t
2046 vm_page_grab( void )
2047 {
2048 vm_page_t mem;
2049
2050
2051 disable_preemption();
2052
2053 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2054 return_page_from_cpu_list:
2055 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2056 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next;
2057
2058 enable_preemption();
2059 mem->pageq.next = NULL;
2060
2061 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2062 assert(mem->tabled == FALSE);
2063 assert(mem->object == VM_OBJECT_NULL);
2064 assert(!mem->laundry);
2065 assert(!mem->free);
2066 assert(pmap_verify_free(mem->phys_page));
2067 assert(mem->busy);
2068 assert(!mem->encrypted);
2069 assert(!mem->pmapped);
2070 assert(!mem->wpmapped);
2071 assert(!mem->active);
2072 assert(!mem->inactive);
2073 assert(!mem->throttled);
2074 assert(!mem->speculative);
2075 assert(!pmap_is_noencrypt(mem->phys_page));
2076
2077 return mem;
2078 }
2079 enable_preemption();
2080
2081
2082 /*
2083 * Optionally produce warnings if the wire or gobble
2084 * counts exceed some threshold.
2085 */
2086 #if VM_PAGE_WIRE_COUNT_WARNING
2087 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) {
2088 printf("mk: vm_page_grab(): high wired page count of %d\n",
2089 vm_page_wire_count);
2090 }
2091 #endif
2092 #if VM_PAGE_GOBBLE_COUNT_WARNING
2093 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) {
2094 printf("mk: vm_page_grab(): high gobbled page count of %d\n",
2095 vm_page_gobble_count);
2096 }
2097 #endif
2098 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2099
2100 /*
2101 * Only let privileged threads (involved in pageout)
2102 * dip into the reserved pool.
2103 */
2104 if ((vm_page_free_count < vm_page_free_reserved) &&
2105 !(current_thread()->options & TH_OPT_VMPRIV)) {
2106 lck_mtx_unlock(&vm_page_queue_free_lock);
2107 mem = VM_PAGE_NULL;
2108 }
2109 else {
2110 vm_page_t head;
2111 vm_page_t tail;
2112 unsigned int pages_to_steal;
2113 unsigned int color;
2114
2115 while ( vm_page_free_count == 0 ) {
2116
2117 lck_mtx_unlock(&vm_page_queue_free_lock);
2118 /*
2119 * must be a privileged thread to be
2120 * in this state since a non-privileged
2121 * thread would have bailed if we were
2122 * under the vm_page_free_reserved mark
2123 */
2124 VM_PAGE_WAIT();
2125 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2126 }
2127
2128 disable_preemption();
2129
2130 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) {
2131 lck_mtx_unlock(&vm_page_queue_free_lock);
2132
2133 /*
2134 * we got preempted and moved to another processor
2135 * or we got preempted and someone else ran and filled the cache
2136 */
2137 goto return_page_from_cpu_list;
2138 }
2139 if (vm_page_free_count <= vm_page_free_reserved)
2140 pages_to_steal = 1;
2141 else {
2142 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved))
2143 pages_to_steal = vm_free_magazine_refill_limit;
2144 else
2145 pages_to_steal = (vm_page_free_count - vm_page_free_reserved);
2146 }
2147 color = PROCESSOR_DATA(current_processor(), start_color);
2148 head = tail = NULL;
2149
2150 vm_page_free_count -= pages_to_steal;
2151
2152 while (pages_to_steal--) {
2153
2154 while (queue_empty(&vm_page_queue_free[color]))
2155 color = (color + 1) & vm_color_mask;
2156
2157 queue_remove_first(&vm_page_queue_free[color],
2158 mem,
2159 vm_page_t,
2160 pageq);
2161 mem->pageq.next = NULL;
2162 mem->pageq.prev = NULL;
2163
2164 assert(!mem->active);
2165 assert(!mem->inactive);
2166 assert(!mem->throttled);
2167 assert(!mem->speculative);
2168
2169 color = (color + 1) & vm_color_mask;
2170
2171 if (head == NULL)
2172 head = mem;
2173 else
2174 tail->pageq.next = (queue_t)mem;
2175 tail = mem;
2176
2177 assert(mem->listq.next == NULL && mem->listq.prev == NULL);
2178 assert(mem->tabled == FALSE);
2179 assert(mem->object == VM_OBJECT_NULL);
2180 assert(!mem->laundry);
2181 assert(mem->free);
2182 mem->free = FALSE;
2183
2184 assert(pmap_verify_free(mem->phys_page));
2185 assert(mem->busy);
2186 assert(!mem->free);
2187 assert(!mem->encrypted);
2188 assert(!mem->pmapped);
2189 assert(!mem->wpmapped);
2190 assert(!pmap_is_noencrypt(mem->phys_page));
2191 }
2192 lck_mtx_unlock(&vm_page_queue_free_lock);
2193
2194 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next;
2195 PROCESSOR_DATA(current_processor(), start_color) = color;
2196
2197 /*
2198 * satisfy this request
2199 */
2200 PROCESSOR_DATA(current_processor(), page_grab_count) += 1;
2201 mem = head;
2202 mem->pageq.next = NULL;
2203
2204 enable_preemption();
2205 }
2206 /*
2207 * Decide if we should poke the pageout daemon.
2208 * We do this if the free count is less than the low
2209 * water mark, or if the free count is less than the high
2210 * water mark (but above the low water mark) and the inactive
2211 * count is less than its target.
2212 *
2213 * We don't have the counts locked ... if they change a little,
2214 * it doesn't really matter.
2215 */
2216 if ((vm_page_free_count < vm_page_free_min) ||
2217 ((vm_page_free_count < vm_page_free_target) &&
2218 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
2219 thread_wakeup((event_t) &vm_page_free_wanted);
2220
2221 VM_CHECK_MEMORYSTATUS;
2222
2223 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */
2224
2225 return mem;
2226 }
2227
2228 /*
2229 * vm_page_release:
2230 *
2231 * Return a page to the free list.
2232 */
2233
2234 void
2235 vm_page_release(
2236 register vm_page_t mem)
2237 {
2238 unsigned int color;
2239 int need_wakeup = 0;
2240 int need_priv_wakeup = 0;
2241
2242
2243 assert(!mem->private && !mem->fictitious);
2244 if (vm_page_free_verify) {
2245 assert(pmap_verify_free(mem->phys_page));
2246 }
2247 // dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */
2248
2249 pmap_clear_noencrypt(mem->phys_page);
2250
2251 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2252 #if DEBUG
2253 if (mem->free)
2254 panic("vm_page_release");
2255 #endif
2256
2257 assert(mem->busy);
2258 assert(!mem->laundry);
2259 assert(mem->object == VM_OBJECT_NULL);
2260 assert(mem->pageq.next == NULL &&
2261 mem->pageq.prev == NULL);
2262 assert(mem->listq.next == NULL &&
2263 mem->listq.prev == NULL);
2264
2265 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2266 vm_lopage_free_count < vm_lopage_free_limit &&
2267 mem->phys_page < max_valid_low_ppnum) {
2268 /*
2269 * this exists to support hardware controllers
2270 * incapable of generating DMAs with more than 32 bits
2271 * of address on platforms with physical memory > 4G...
2272 */
2273 queue_enter_first(&vm_lopage_queue_free,
2274 mem,
2275 vm_page_t,
2276 pageq);
2277 vm_lopage_free_count++;
2278
2279 if (vm_lopage_free_count >= vm_lopage_free_limit)
2280 vm_lopage_refill = FALSE;
2281
2282 mem->lopage = TRUE;
2283 } else {
2284 mem->lopage = FALSE;
2285 mem->free = TRUE;
2286
2287 color = mem->phys_page & vm_color_mask;
2288 queue_enter_first(&vm_page_queue_free[color],
2289 mem,
2290 vm_page_t,
2291 pageq);
2292 vm_page_free_count++;
2293 /*
2294 * Check if we should wake up someone waiting for page.
2295 * But don't bother waking them unless they can allocate.
2296 *
2297 * We wakeup only one thread, to prevent starvation.
2298 * Because the scheduling system handles wait queues FIFO,
2299 * if we wakeup all waiting threads, one greedy thread
2300 * can starve multiple niceguy threads. When the threads
2301 * all wakeup, the greedy threads runs first, grabs the page,
2302 * and waits for another page. It will be the first to run
2303 * when the next page is freed.
2304 *
2305 * However, there is a slight danger here.
2306 * The thread we wake might not use the free page.
2307 * Then the other threads could wait indefinitely
2308 * while the page goes unused. To forestall this,
2309 * the pageout daemon will keep making free pages
2310 * as long as vm_page_free_wanted is non-zero.
2311 */
2312
2313 assert(vm_page_free_count > 0);
2314 if (vm_page_free_wanted_privileged > 0) {
2315 vm_page_free_wanted_privileged--;
2316 need_priv_wakeup = 1;
2317 } else if (vm_page_free_wanted > 0 &&
2318 vm_page_free_count > vm_page_free_reserved) {
2319 vm_page_free_wanted--;
2320 need_wakeup = 1;
2321 }
2322 }
2323 lck_mtx_unlock(&vm_page_queue_free_lock);
2324
2325 if (need_priv_wakeup)
2326 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged);
2327 else if (need_wakeup)
2328 thread_wakeup_one((event_t) &vm_page_free_count);
2329
2330 VM_CHECK_MEMORYSTATUS;
2331 }
2332
2333 /*
2334 * This version of vm_page_release() is used only at startup
2335 * when we are single-threaded and pages are being released
2336 * for the first time. Hence, no locking or unnecessary checks are made.
2337 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller.
2338 */
2339 void
2340 vm_page_release_startup(
2341 register vm_page_t mem)
2342 {
2343 queue_t queue_free;
2344
2345 if (vm_lopage_free_count < vm_lopage_free_limit &&
2346 mem->phys_page < max_valid_low_ppnum) {
2347 mem->lopage = TRUE;
2348 vm_lopage_free_count++;
2349 queue_free = &vm_lopage_queue_free;
2350 } else {
2351 mem->lopage = FALSE;
2352 mem->free = TRUE;
2353 vm_page_free_count++;
2354 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask];
2355 }
2356 queue_enter_first(queue_free, mem, vm_page_t, pageq);
2357 }
2358
2359 /*
2360 * vm_page_wait:
2361 *
2362 * Wait for a page to become available.
2363 * If there are plenty of free pages, then we don't sleep.
2364 *
2365 * Returns:
2366 * TRUE: There may be another page, try again
2367 * FALSE: We were interrupted out of our wait, don't try again
2368 */
2369
2370 boolean_t
2371 vm_page_wait(
2372 int interruptible )
2373 {
2374 /*
2375 * We can't use vm_page_free_reserved to make this
2376 * determination. Consider: some thread might
2377 * need to allocate two pages. The first allocation
2378 * succeeds, the second fails. After the first page is freed,
2379 * a call to vm_page_wait must really block.
2380 */
2381 kern_return_t wait_result;
2382 int need_wakeup = 0;
2383 int is_privileged = current_thread()->options & TH_OPT_VMPRIV;
2384
2385 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2386
2387 if (is_privileged && vm_page_free_count) {
2388 lck_mtx_unlock(&vm_page_queue_free_lock);
2389 return TRUE;
2390 }
2391 if (vm_page_free_count < vm_page_free_target) {
2392
2393 if (is_privileged) {
2394 if (vm_page_free_wanted_privileged++ == 0)
2395 need_wakeup = 1;
2396 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible);
2397 } else {
2398 if (vm_page_free_wanted++ == 0)
2399 need_wakeup = 1;
2400 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible);
2401 }
2402 lck_mtx_unlock(&vm_page_queue_free_lock);
2403 counter(c_vm_page_wait_block++);
2404
2405 if (need_wakeup)
2406 thread_wakeup((event_t)&vm_page_free_wanted);
2407
2408 if (wait_result == THREAD_WAITING) {
2409 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START,
2410 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0);
2411 wait_result = thread_block(THREAD_CONTINUE_NULL);
2412 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0);
2413 }
2414
2415 return(wait_result == THREAD_AWAKENED);
2416 } else {
2417 lck_mtx_unlock(&vm_page_queue_free_lock);
2418 return TRUE;
2419 }
2420 }
2421
2422 /*
2423 * vm_page_alloc:
2424 *
2425 * Allocate and return a memory cell associated
2426 * with this VM object/offset pair.
2427 *
2428 * Object must be locked.
2429 */
2430
2431 vm_page_t
2432 vm_page_alloc(
2433 vm_object_t object,
2434 vm_object_offset_t offset)
2435 {
2436 register vm_page_t mem;
2437
2438 vm_object_lock_assert_exclusive(object);
2439 mem = vm_page_grab();
2440 if (mem == VM_PAGE_NULL)
2441 return VM_PAGE_NULL;
2442
2443 vm_page_insert(mem, object, offset);
2444
2445 return(mem);
2446 }
2447
2448 vm_page_t
2449 vm_page_alloclo(
2450 vm_object_t object,
2451 vm_object_offset_t offset)
2452 {
2453 register vm_page_t mem;
2454
2455 vm_object_lock_assert_exclusive(object);
2456 mem = vm_page_grablo();
2457 if (mem == VM_PAGE_NULL)
2458 return VM_PAGE_NULL;
2459
2460 vm_page_insert(mem, object, offset);
2461
2462 return(mem);
2463 }
2464
2465
2466 /*
2467 * vm_page_alloc_guard:
2468 *
2469 * Allocate a fictitious page which will be used
2470 * as a guard page. The page will be inserted into
2471 * the object and returned to the caller.
2472 */
2473
2474 vm_page_t
2475 vm_page_alloc_guard(
2476 vm_object_t object,
2477 vm_object_offset_t offset)
2478 {
2479 register vm_page_t mem;
2480
2481 vm_object_lock_assert_exclusive(object);
2482 mem = vm_page_grab_guard();
2483 if (mem == VM_PAGE_NULL)
2484 return VM_PAGE_NULL;
2485
2486 vm_page_insert(mem, object, offset);
2487
2488 return(mem);
2489 }
2490
2491
2492 counter(unsigned int c_laundry_pages_freed = 0;)
2493
2494 /*
2495 * vm_page_free_prepare:
2496 *
2497 * Removes page from any queue it may be on
2498 * and disassociates it from its VM object.
2499 *
2500 * Object and page queues must be locked prior to entry.
2501 */
2502 static void
2503 vm_page_free_prepare(
2504 vm_page_t mem)
2505 {
2506 vm_page_free_prepare_queues(mem);
2507 vm_page_free_prepare_object(mem, TRUE);
2508 }
2509
2510
2511 void
2512 vm_page_free_prepare_queues(
2513 vm_page_t mem)
2514 {
2515 VM_PAGE_CHECK(mem);
2516 assert(!mem->free);
2517 assert(!mem->cleaning);
2518
2519 #if MACH_ASSERT || DEBUG
2520 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2521 if (mem->free)
2522 panic("vm_page_free: freeing page on free list\n");
2523 #endif /* MACH_ASSERT || DEBUG */
2524 if (mem->object) {
2525 vm_object_lock_assert_exclusive(mem->object);
2526 }
2527 if (mem->laundry) {
2528 /*
2529 * We may have to free a page while it's being laundered
2530 * if we lost its pager (due to a forced unmount, for example).
2531 * We need to call vm_pageout_steal_laundry() before removing
2532 * the page from its VM object, so that we can remove it
2533 * from its pageout queue and adjust the laundry accounting
2534 */
2535 vm_pageout_steal_laundry(mem, TRUE);
2536 counter(++c_laundry_pages_freed);
2537 }
2538
2539 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */
2540
2541 if (VM_PAGE_WIRED(mem)) {
2542 if (mem->object) {
2543 assert(mem->object->wired_page_count > 0);
2544 mem->object->wired_page_count--;
2545 assert(mem->object->resident_page_count >=
2546 mem->object->wired_page_count);
2547
2548 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2549 OSAddAtomic(+1, &vm_page_purgeable_count);
2550 assert(vm_page_purgeable_wired_count > 0);
2551 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2552 }
2553 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2554 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2555 mem->object->vo_purgeable_owner != TASK_NULL) {
2556 task_t owner;
2557
2558 owner = mem->object->vo_purgeable_owner;
2559 /*
2560 * While wired, this page was accounted
2561 * as "non-volatile" but it should now
2562 * be accounted as "volatile".
2563 */
2564 /* one less "non-volatile"... */
2565 ledger_debit(owner->ledger,
2566 task_ledgers.purgeable_nonvolatile,
2567 PAGE_SIZE);
2568 /* ... and "phys_footprint" */
2569 ledger_debit(owner->ledger,
2570 task_ledgers.phys_footprint,
2571 PAGE_SIZE);
2572 /* one more "volatile" */
2573 ledger_credit(owner->ledger,
2574 task_ledgers.purgeable_volatile,
2575 PAGE_SIZE);
2576 }
2577 }
2578 if (!mem->private && !mem->fictitious)
2579 vm_page_wire_count--;
2580 mem->wire_count = 0;
2581 assert(!mem->gobbled);
2582 } else if (mem->gobbled) {
2583 if (!mem->private && !mem->fictitious)
2584 vm_page_wire_count--;
2585 vm_page_gobble_count--;
2586 }
2587 }
2588
2589
2590 void
2591 vm_page_free_prepare_object(
2592 vm_page_t mem,
2593 boolean_t remove_from_hash)
2594 {
2595 if (mem->tabled)
2596 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */
2597
2598 PAGE_WAKEUP(mem); /* clears wanted */
2599
2600 if (mem->private) {
2601 mem->private = FALSE;
2602 mem->fictitious = TRUE;
2603 mem->phys_page = vm_page_fictitious_addr;
2604 }
2605 if ( !mem->fictitious) {
2606 vm_page_init(mem, mem->phys_page, mem->lopage);
2607 }
2608 }
2609
2610
2611 /*
2612 * vm_page_free:
2613 *
2614 * Returns the given page to the free list,
2615 * disassociating it with any VM object.
2616 *
2617 * Object and page queues must be locked prior to entry.
2618 */
2619 void
2620 vm_page_free(
2621 vm_page_t mem)
2622 {
2623 vm_page_free_prepare(mem);
2624
2625 if (mem->fictitious) {
2626 vm_page_release_fictitious(mem);
2627 } else {
2628 vm_page_release(mem);
2629 }
2630 }
2631
2632
2633 void
2634 vm_page_free_unlocked(
2635 vm_page_t mem,
2636 boolean_t remove_from_hash)
2637 {
2638 vm_page_lockspin_queues();
2639 vm_page_free_prepare_queues(mem);
2640 vm_page_unlock_queues();
2641
2642 vm_page_free_prepare_object(mem, remove_from_hash);
2643
2644 if (mem->fictitious) {
2645 vm_page_release_fictitious(mem);
2646 } else {
2647 vm_page_release(mem);
2648 }
2649 }
2650
2651
2652 /*
2653 * Free a list of pages. The list can be up to several hundred pages,
2654 * as blocked up by vm_pageout_scan().
2655 * The big win is not having to take the free list lock once
2656 * per page.
2657 */
2658 void
2659 vm_page_free_list(
2660 vm_page_t freeq,
2661 boolean_t prepare_object)
2662 {
2663 vm_page_t mem;
2664 vm_page_t nxt;
2665 vm_page_t local_freeq;
2666 int pg_count;
2667
2668 while (freeq) {
2669
2670 pg_count = 0;
2671 local_freeq = VM_PAGE_NULL;
2672 mem = freeq;
2673
2674 /*
2675 * break up the processing into smaller chunks so
2676 * that we can 'pipeline' the pages onto the
2677 * free list w/o introducing too much
2678 * contention on the global free queue lock
2679 */
2680 while (mem && pg_count < 64) {
2681
2682 assert(!mem->inactive);
2683 assert(!mem->active);
2684 assert(!mem->throttled);
2685 assert(!mem->free);
2686 assert(!mem->speculative);
2687 assert(!VM_PAGE_WIRED(mem));
2688 assert(mem->pageq.prev == NULL);
2689
2690 nxt = (vm_page_t)(mem->pageq.next);
2691
2692 if (vm_page_free_verify && !mem->fictitious && !mem->private) {
2693 assert(pmap_verify_free(mem->phys_page));
2694 }
2695 if (prepare_object == TRUE)
2696 vm_page_free_prepare_object(mem, TRUE);
2697
2698 if (!mem->fictitious) {
2699 assert(mem->busy);
2700
2701 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) &&
2702 vm_lopage_free_count < vm_lopage_free_limit &&
2703 mem->phys_page < max_valid_low_ppnum) {
2704 mem->pageq.next = NULL;
2705 vm_page_release(mem);
2706 } else {
2707 /*
2708 * IMPORTANT: we can't set the page "free" here
2709 * because that would make the page eligible for
2710 * a physically-contiguous allocation (see
2711 * vm_page_find_contiguous()) right away (we don't
2712 * hold the vm_page_queue_free lock). That would
2713 * cause trouble because the page is not actually
2714 * in the free queue yet...
2715 */
2716 mem->pageq.next = (queue_entry_t)local_freeq;
2717 local_freeq = mem;
2718 pg_count++;
2719
2720 pmap_clear_noencrypt(mem->phys_page);
2721 }
2722 } else {
2723 assert(mem->phys_page == vm_page_fictitious_addr ||
2724 mem->phys_page == vm_page_guard_addr);
2725 vm_page_release_fictitious(mem);
2726 }
2727 mem = nxt;
2728 }
2729 freeq = mem;
2730
2731 if ( (mem = local_freeq) ) {
2732 unsigned int avail_free_count;
2733 unsigned int need_wakeup = 0;
2734 unsigned int need_priv_wakeup = 0;
2735
2736 lck_mtx_lock_spin(&vm_page_queue_free_lock);
2737
2738 while (mem) {
2739 int color;
2740
2741 nxt = (vm_page_t)(mem->pageq.next);
2742
2743 assert(!mem->free);
2744 assert(mem->busy);
2745 mem->free = TRUE;
2746
2747 color = mem->phys_page & vm_color_mask;
2748 queue_enter_first(&vm_page_queue_free[color],
2749 mem,
2750 vm_page_t,
2751 pageq);
2752 mem = nxt;
2753 }
2754 vm_page_free_count += pg_count;
2755 avail_free_count = vm_page_free_count;
2756
2757 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) {
2758
2759 if (avail_free_count < vm_page_free_wanted_privileged) {
2760 need_priv_wakeup = avail_free_count;
2761 vm_page_free_wanted_privileged -= avail_free_count;
2762 avail_free_count = 0;
2763 } else {
2764 need_priv_wakeup = vm_page_free_wanted_privileged;
2765 vm_page_free_wanted_privileged = 0;
2766 avail_free_count -= vm_page_free_wanted_privileged;
2767 }
2768 }
2769 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) {
2770 unsigned int available_pages;
2771
2772 available_pages = avail_free_count - vm_page_free_reserved;
2773
2774 if (available_pages >= vm_page_free_wanted) {
2775 need_wakeup = vm_page_free_wanted;
2776 vm_page_free_wanted = 0;
2777 } else {
2778 need_wakeup = available_pages;
2779 vm_page_free_wanted -= available_pages;
2780 }
2781 }
2782 lck_mtx_unlock(&vm_page_queue_free_lock);
2783
2784 if (need_priv_wakeup != 0) {
2785 /*
2786 * There shouldn't be that many VM-privileged threads,
2787 * so let's wake them all up, even if we don't quite
2788 * have enough pages to satisfy them all.
2789 */
2790 thread_wakeup((event_t)&vm_page_free_wanted_privileged);
2791 }
2792 if (need_wakeup != 0 && vm_page_free_wanted == 0) {
2793 /*
2794 * We don't expect to have any more waiters
2795 * after this, so let's wake them all up at
2796 * once.
2797 */
2798 thread_wakeup((event_t) &vm_page_free_count);
2799 } else for (; need_wakeup != 0; need_wakeup--) {
2800 /*
2801 * Wake up one waiter per page we just released.
2802 */
2803 thread_wakeup_one((event_t) &vm_page_free_count);
2804 }
2805
2806 VM_CHECK_MEMORYSTATUS;
2807 }
2808 }
2809 }
2810
2811
2812 /*
2813 * vm_page_wire:
2814 *
2815 * Mark this page as wired down by yet
2816 * another map, removing it from paging queues
2817 * as necessary.
2818 *
2819 * The page's object and the page queues must be locked.
2820 */
2821 void
2822 vm_page_wire(
2823 register vm_page_t mem)
2824 {
2825
2826 // dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */
2827
2828 VM_PAGE_CHECK(mem);
2829 if (mem->object) {
2830 vm_object_lock_assert_exclusive(mem->object);
2831 } else {
2832 /*
2833 * In theory, the page should be in an object before it
2834 * gets wired, since we need to hold the object lock
2835 * to update some fields in the page structure.
2836 * However, some code (i386 pmap, for example) might want
2837 * to wire a page before it gets inserted into an object.
2838 * That's somewhat OK, as long as nobody else can get to
2839 * that page and update it at the same time.
2840 */
2841 }
2842 #if DEBUG
2843 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2844 #endif
2845 if ( !VM_PAGE_WIRED(mem)) {
2846
2847 if (mem->pageout_queue) {
2848 mem->pageout = FALSE;
2849 vm_pageout_throttle_up(mem);
2850 }
2851 VM_PAGE_QUEUES_REMOVE(mem);
2852
2853 if (mem->object) {
2854 mem->object->wired_page_count++;
2855 assert(mem->object->resident_page_count >=
2856 mem->object->wired_page_count);
2857 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2858 assert(vm_page_purgeable_count > 0);
2859 OSAddAtomic(-1, &vm_page_purgeable_count);
2860 OSAddAtomic(1, &vm_page_purgeable_wired_count);
2861 }
2862 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2863 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2864 mem->object->vo_purgeable_owner != TASK_NULL) {
2865 task_t owner;
2866
2867 owner = mem->object->vo_purgeable_owner;
2868 /* less volatile bytes */
2869 ledger_debit(owner->ledger,
2870 task_ledgers.purgeable_volatile,
2871 PAGE_SIZE);
2872 /* more not-quite-volatile bytes */
2873 ledger_credit(owner->ledger,
2874 task_ledgers.purgeable_nonvolatile,
2875 PAGE_SIZE);
2876 /* more footprint */
2877 ledger_credit(owner->ledger,
2878 task_ledgers.phys_footprint,
2879 PAGE_SIZE);
2880 }
2881 if (mem->object->all_reusable) {
2882 /*
2883 * Wired pages are not counted as "re-usable"
2884 * in "all_reusable" VM objects, so nothing
2885 * to do here.
2886 */
2887 } else if (mem->reusable) {
2888 /*
2889 * This page is not "re-usable" when it's
2890 * wired, so adjust its state and the
2891 * accounting.
2892 */
2893 vm_object_reuse_pages(mem->object,
2894 mem->offset,
2895 mem->offset+PAGE_SIZE_64,
2896 FALSE);
2897 }
2898 }
2899 assert(!mem->reusable);
2900
2901 if (!mem->private && !mem->fictitious && !mem->gobbled)
2902 vm_page_wire_count++;
2903 if (mem->gobbled)
2904 vm_page_gobble_count--;
2905 mem->gobbled = FALSE;
2906
2907 VM_CHECK_MEMORYSTATUS;
2908
2909 /*
2910 * ENCRYPTED SWAP:
2911 * The page could be encrypted, but
2912 * We don't have to decrypt it here
2913 * because we don't guarantee that the
2914 * data is actually valid at this point.
2915 * The page will get decrypted in
2916 * vm_fault_wire() if needed.
2917 */
2918 }
2919 assert(!mem->gobbled);
2920 mem->wire_count++;
2921 VM_PAGE_CHECK(mem);
2922 }
2923
2924 /*
2925 * vm_page_gobble:
2926 *
2927 * Mark this page as consumed by the vm/ipc/xmm subsystems.
2928 *
2929 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked.
2930 */
2931 void
2932 vm_page_gobble(
2933 register vm_page_t mem)
2934 {
2935 vm_page_lockspin_queues();
2936 VM_PAGE_CHECK(mem);
2937
2938 assert(!mem->gobbled);
2939 assert( !VM_PAGE_WIRED(mem));
2940
2941 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) {
2942 if (!mem->private && !mem->fictitious)
2943 vm_page_wire_count++;
2944 }
2945 vm_page_gobble_count++;
2946 mem->gobbled = TRUE;
2947 vm_page_unlock_queues();
2948 }
2949
2950 /*
2951 * vm_page_unwire:
2952 *
2953 * Release one wiring of this page, potentially
2954 * enabling it to be paged again.
2955 *
2956 * The page's object and the page queues must be locked.
2957 */
2958 void
2959 vm_page_unwire(
2960 vm_page_t mem,
2961 boolean_t queueit)
2962 {
2963
2964 // dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */
2965
2966 VM_PAGE_CHECK(mem);
2967 assert(VM_PAGE_WIRED(mem));
2968 assert(mem->object != VM_OBJECT_NULL);
2969 #if DEBUG
2970 vm_object_lock_assert_exclusive(mem->object);
2971 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
2972 #endif
2973 if (--mem->wire_count == 0) {
2974 assert(!mem->private && !mem->fictitious);
2975 vm_page_wire_count--;
2976 assert(mem->object->wired_page_count > 0);
2977 mem->object->wired_page_count--;
2978 assert(mem->object->resident_page_count >=
2979 mem->object->wired_page_count);
2980 if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
2981 OSAddAtomic(+1, &vm_page_purgeable_count);
2982 assert(vm_page_purgeable_wired_count > 0);
2983 OSAddAtomic(-1, &vm_page_purgeable_wired_count);
2984 }
2985 if ((mem->object->purgable == VM_PURGABLE_VOLATILE ||
2986 mem->object->purgable == VM_PURGABLE_EMPTY) &&
2987 mem->object->vo_purgeable_owner != TASK_NULL) {
2988 task_t owner;
2989
2990 owner = mem->object->vo_purgeable_owner;
2991 /* more volatile bytes */
2992 ledger_credit(owner->ledger,
2993 task_ledgers.purgeable_volatile,
2994 PAGE_SIZE);
2995 /* less not-quite-volatile bytes */
2996 ledger_debit(owner->ledger,
2997 task_ledgers.purgeable_nonvolatile,
2998 PAGE_SIZE);
2999 /* less footprint */
3000 ledger_debit(owner->ledger,
3001 task_ledgers.phys_footprint,
3002 PAGE_SIZE);
3003 }
3004 assert(mem->object != kernel_object);
3005 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
3006
3007 if (queueit == TRUE) {
3008 if (mem->object->purgable == VM_PURGABLE_EMPTY) {
3009 vm_page_deactivate(mem);
3010 } else {
3011 vm_page_activate(mem);
3012 }
3013 }
3014
3015 VM_CHECK_MEMORYSTATUS;
3016
3017 }
3018 VM_PAGE_CHECK(mem);
3019 }
3020
3021 /*
3022 * vm_page_deactivate:
3023 *
3024 * Returns the given page to the inactive list,
3025 * indicating that no physical maps have access
3026 * to this page. [Used by the physical mapping system.]
3027 *
3028 * The page queues must be locked.
3029 */
3030 void
3031 vm_page_deactivate(
3032 vm_page_t m)
3033 {
3034 vm_page_deactivate_internal(m, TRUE);
3035 }
3036
3037
3038 void
3039 vm_page_deactivate_internal(
3040 vm_page_t m,
3041 boolean_t clear_hw_reference)
3042 {
3043
3044 VM_PAGE_CHECK(m);
3045 assert(m->object != kernel_object);
3046 assert(m->phys_page != vm_page_guard_addr);
3047
3048 // dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */
3049 #if DEBUG
3050 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3051 #endif
3052 /*
3053 * This page is no longer very interesting. If it was
3054 * interesting (active or inactive/referenced), then we
3055 * clear the reference bit and (re)enter it in the
3056 * inactive queue. Note wired pages should not have
3057 * their reference bit cleared.
3058 */
3059 assert ( !(m->absent && !m->unusual));
3060
3061 if (m->gobbled) { /* can this happen? */
3062 assert( !VM_PAGE_WIRED(m));
3063
3064 if (!m->private && !m->fictitious)
3065 vm_page_wire_count--;
3066 vm_page_gobble_count--;
3067 m->gobbled = FALSE;
3068 }
3069 /*
3070 * if this page is currently on the pageout queue, we can't do the
3071 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3072 * and we can't remove it manually since we would need the object lock
3073 * (which is not required here) to decrement the activity_in_progress
3074 * reference which is held on the object while the page is in the pageout queue...
3075 * just let the normal laundry processing proceed
3076 */
3077 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m)))
3078 return;
3079
3080 if (!m->absent && clear_hw_reference == TRUE)
3081 pmap_clear_reference(m->phys_page);
3082
3083 m->reference = FALSE;
3084 m->no_cache = FALSE;
3085
3086 if (!m->inactive) {
3087 VM_PAGE_QUEUES_REMOVE(m);
3088
3089 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3090 m->dirty && m->object->internal &&
3091 (m->object->purgable == VM_PURGABLE_DENY ||
3092 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3093 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3094 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3095 m->throttled = TRUE;
3096 vm_page_throttled_count++;
3097 } else {
3098 if (m->object->named && m->object->ref_count == 1) {
3099 vm_page_speculate(m, FALSE);
3100 #if DEVELOPMENT || DEBUG
3101 vm_page_speculative_recreated++;
3102 #endif
3103 } else {
3104 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3105 }
3106 }
3107 }
3108 }
3109
3110 /*
3111 * vm_page_enqueue_cleaned
3112 *
3113 * Put the page on the cleaned queue, mark it cleaned, etc.
3114 * Being on the cleaned queue (and having m->clean_queue set)
3115 * does ** NOT ** guarantee that the page is clean!
3116 *
3117 * Call with the queues lock held.
3118 */
3119
3120 void vm_page_enqueue_cleaned(vm_page_t m)
3121 {
3122 assert(m->phys_page != vm_page_guard_addr);
3123 #if DEBUG
3124 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3125 #endif
3126 assert( !(m->absent && !m->unusual));
3127
3128 if (m->gobbled) {
3129 assert( !VM_PAGE_WIRED(m));
3130 if (!m->private && !m->fictitious)
3131 vm_page_wire_count--;
3132 vm_page_gobble_count--;
3133 m->gobbled = FALSE;
3134 }
3135 /*
3136 * if this page is currently on the pageout queue, we can't do the
3137 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3138 * and we can't remove it manually since we would need the object lock
3139 * (which is not required here) to decrement the activity_in_progress
3140 * reference which is held on the object while the page is in the pageout queue...
3141 * just let the normal laundry processing proceed
3142 */
3143 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious)
3144 return;
3145
3146 VM_PAGE_QUEUES_REMOVE(m);
3147
3148 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq);
3149 m->clean_queue = TRUE;
3150 vm_page_cleaned_count++;
3151
3152 m->inactive = TRUE;
3153 vm_page_inactive_count++;
3154 if (m->object->internal) {
3155 vm_page_pageable_internal_count++;
3156 } else {
3157 vm_page_pageable_external_count++;
3158 }
3159
3160 vm_pageout_enqueued_cleaned++;
3161 }
3162
3163 /*
3164 * vm_page_activate:
3165 *
3166 * Put the specified page on the active list (if appropriate).
3167 *
3168 * The page queues must be locked.
3169 */
3170
3171 void
3172 vm_page_activate(
3173 register vm_page_t m)
3174 {
3175 VM_PAGE_CHECK(m);
3176 #ifdef FIXME_4778297
3177 assert(m->object != kernel_object);
3178 #endif
3179 assert(m->phys_page != vm_page_guard_addr);
3180 #if DEBUG
3181 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3182 #endif
3183 assert( !(m->absent && !m->unusual));
3184
3185 if (m->gobbled) {
3186 assert( !VM_PAGE_WIRED(m));
3187 if (!m->private && !m->fictitious)
3188 vm_page_wire_count--;
3189 vm_page_gobble_count--;
3190 m->gobbled = FALSE;
3191 }
3192 /*
3193 * if this page is currently on the pageout queue, we can't do the
3194 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3195 * and we can't remove it manually since we would need the object lock
3196 * (which is not required here) to decrement the activity_in_progress
3197 * reference which is held on the object while the page is in the pageout queue...
3198 * just let the normal laundry processing proceed
3199 */
3200 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3201 return;
3202
3203 #if DEBUG
3204 if (m->active)
3205 panic("vm_page_activate: already active");
3206 #endif
3207
3208 if (m->speculative) {
3209 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
3210 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL);
3211 }
3212
3213 VM_PAGE_QUEUES_REMOVE(m);
3214
3215 if ( !VM_PAGE_WIRED(m)) {
3216
3217 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) &&
3218 m->dirty && m->object->internal &&
3219 (m->object->purgable == VM_PURGABLE_DENY ||
3220 m->object->purgable == VM_PURGABLE_NONVOLATILE ||
3221 m->object->purgable == VM_PURGABLE_VOLATILE)) {
3222 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq);
3223 m->throttled = TRUE;
3224 vm_page_throttled_count++;
3225 } else {
3226 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq);
3227 m->active = TRUE;
3228 vm_page_active_count++;
3229 if (m->object->internal) {
3230 vm_page_pageable_internal_count++;
3231 } else {
3232 vm_page_pageable_external_count++;
3233 }
3234 }
3235 m->reference = TRUE;
3236 m->no_cache = FALSE;
3237 }
3238 VM_PAGE_CHECK(m);
3239 }
3240
3241
3242 /*
3243 * vm_page_speculate:
3244 *
3245 * Put the specified page on the speculative list (if appropriate).
3246 *
3247 * The page queues must be locked.
3248 */
3249 void
3250 vm_page_speculate(
3251 vm_page_t m,
3252 boolean_t new)
3253 {
3254 struct vm_speculative_age_q *aq;
3255
3256 VM_PAGE_CHECK(m);
3257 assert(m->object != kernel_object);
3258 assert(m->phys_page != vm_page_guard_addr);
3259 #if DEBUG
3260 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3261 #endif
3262 assert( !(m->absent && !m->unusual));
3263
3264 /*
3265 * if this page is currently on the pageout queue, we can't do the
3266 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3267 * and we can't remove it manually since we would need the object lock
3268 * (which is not required here) to decrement the activity_in_progress
3269 * reference which is held on the object while the page is in the pageout queue...
3270 * just let the normal laundry processing proceed
3271 */
3272 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor)
3273 return;
3274
3275 VM_PAGE_QUEUES_REMOVE(m);
3276
3277 if ( !VM_PAGE_WIRED(m)) {
3278 mach_timespec_t ts;
3279 clock_sec_t sec;
3280 clock_nsec_t nsec;
3281
3282 clock_get_system_nanotime(&sec, &nsec);
3283 ts.tv_sec = (unsigned int) sec;
3284 ts.tv_nsec = nsec;
3285
3286 if (vm_page_speculative_count == 0) {
3287
3288 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3289 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3290
3291 aq = &vm_page_queue_speculative[speculative_age_index];
3292
3293 /*
3294 * set the timer to begin a new group
3295 */
3296 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3297 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3298
3299 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3300 } else {
3301 aq = &vm_page_queue_speculative[speculative_age_index];
3302
3303 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) {
3304
3305 speculative_age_index++;
3306
3307 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3308 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3309 if (speculative_age_index == speculative_steal_index) {
3310 speculative_steal_index = speculative_age_index + 1;
3311
3312 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
3313 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
3314 }
3315 aq = &vm_page_queue_speculative[speculative_age_index];
3316
3317 if (!queue_empty(&aq->age_q))
3318 vm_page_speculate_ageit(aq);
3319
3320 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000;
3321 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC;
3322
3323 ADD_MACH_TIMESPEC(&aq->age_ts, &ts);
3324 }
3325 }
3326 enqueue_tail(&aq->age_q, &m->pageq);
3327 m->speculative = TRUE;
3328 vm_page_speculative_count++;
3329 if (m->object->internal) {
3330 vm_page_pageable_internal_count++;
3331 } else {
3332 vm_page_pageable_external_count++;
3333 }
3334
3335 if (new == TRUE) {
3336 vm_object_lock_assert_exclusive(m->object);
3337
3338 m->object->pages_created++;
3339 #if DEVELOPMENT || DEBUG
3340 vm_page_speculative_created++;
3341 #endif
3342 }
3343 }
3344 VM_PAGE_CHECK(m);
3345 }
3346
3347
3348 /*
3349 * move pages from the specified aging bin to
3350 * the speculative bin that pageout_scan claims from
3351 *
3352 * The page queues must be locked.
3353 */
3354 void
3355 vm_page_speculate_ageit(struct vm_speculative_age_q *aq)
3356 {
3357 struct vm_speculative_age_q *sq;
3358 vm_page_t t;
3359
3360 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
3361
3362 if (queue_empty(&sq->age_q)) {
3363 sq->age_q.next = aq->age_q.next;
3364 sq->age_q.prev = aq->age_q.prev;
3365
3366 t = (vm_page_t)sq->age_q.next;
3367 t->pageq.prev = &sq->age_q;
3368
3369 t = (vm_page_t)sq->age_q.prev;
3370 t->pageq.next = &sq->age_q;
3371 } else {
3372 t = (vm_page_t)sq->age_q.prev;
3373 t->pageq.next = aq->age_q.next;
3374
3375 t = (vm_page_t)aq->age_q.next;
3376 t->pageq.prev = sq->age_q.prev;
3377
3378 t = (vm_page_t)aq->age_q.prev;
3379 t->pageq.next = &sq->age_q;
3380
3381 sq->age_q.prev = aq->age_q.prev;
3382 }
3383 queue_init(&aq->age_q);
3384 }
3385
3386
3387 void
3388 vm_page_lru(
3389 vm_page_t m)
3390 {
3391 VM_PAGE_CHECK(m);
3392 assert(m->object != kernel_object);
3393 assert(m->phys_page != vm_page_guard_addr);
3394
3395 #if DEBUG
3396 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3397 #endif
3398 /*
3399 * if this page is currently on the pageout queue, we can't do the
3400 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case)
3401 * and we can't remove it manually since we would need the object lock
3402 * (which is not required here) to decrement the activity_in_progress
3403 * reference which is held on the object while the page is in the pageout queue...
3404 * just let the normal laundry processing proceed
3405 */
3406 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m)))
3407 return;
3408
3409 m->no_cache = FALSE;
3410
3411 VM_PAGE_QUEUES_REMOVE(m);
3412
3413 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
3414 }
3415
3416
3417 void
3418 vm_page_reactivate_all_throttled(void)
3419 {
3420 vm_page_t first_throttled, last_throttled;
3421 vm_page_t first_active;
3422 vm_page_t m;
3423 int extra_active_count;
3424 int extra_internal_count, extra_external_count;
3425
3426 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default))
3427 return;
3428
3429 extra_active_count = 0;
3430 extra_internal_count = 0;
3431 extra_external_count = 0;
3432 vm_page_lock_queues();
3433 if (! queue_empty(&vm_page_queue_throttled)) {
3434 /*
3435 * Switch "throttled" pages to "active".
3436 */
3437 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) {
3438 VM_PAGE_CHECK(m);
3439 assert(m->throttled);
3440 assert(!m->active);
3441 assert(!m->inactive);
3442 assert(!m->speculative);
3443 assert(!VM_PAGE_WIRED(m));
3444
3445 extra_active_count++;
3446 if (m->object->internal) {
3447 extra_internal_count++;
3448 } else {
3449 extra_external_count++;
3450 }
3451
3452 m->throttled = FALSE;
3453 m->active = TRUE;
3454 VM_PAGE_CHECK(m);
3455 }
3456
3457 /*
3458 * Transfer the entire throttled queue to a regular LRU page queues.
3459 * We insert it at the head of the active queue, so that these pages
3460 * get re-evaluated by the LRU algorithm first, since they've been
3461 * completely out of it until now.
3462 */
3463 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled);
3464 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled);
3465 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3466 if (queue_empty(&vm_page_queue_active)) {
3467 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled;
3468 } else {
3469 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled;
3470 }
3471 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled;
3472 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active;
3473 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active;
3474
3475 #if DEBUG
3476 printf("reactivated %d throttled pages\n", vm_page_throttled_count);
3477 #endif
3478 queue_init(&vm_page_queue_throttled);
3479 /*
3480 * Adjust the global page counts.
3481 */
3482 vm_page_active_count += extra_active_count;
3483 vm_page_pageable_internal_count += extra_internal_count;
3484 vm_page_pageable_external_count += extra_external_count;
3485 vm_page_throttled_count = 0;
3486 }
3487 assert(vm_page_throttled_count == 0);
3488 assert(queue_empty(&vm_page_queue_throttled));
3489 vm_page_unlock_queues();
3490 }
3491
3492
3493 /*
3494 * move pages from the indicated local queue to the global active queue
3495 * its ok to fail if we're below the hard limit and force == FALSE
3496 * the nolocks == TRUE case is to allow this function to be run on
3497 * the hibernate path
3498 */
3499
3500 void
3501 vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks)
3502 {
3503 struct vpl *lq;
3504 vm_page_t first_local, last_local;
3505 vm_page_t first_active;
3506 vm_page_t m;
3507 uint32_t count = 0;
3508
3509 if (vm_page_local_q == NULL)
3510 return;
3511
3512 lq = &vm_page_local_q[lid].vpl_un.vpl;
3513
3514 if (nolocks == FALSE) {
3515 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) {
3516 if ( !vm_page_trylockspin_queues())
3517 return;
3518 } else
3519 vm_page_lockspin_queues();
3520
3521 VPL_LOCK(&lq->vpl_lock);
3522 }
3523 if (lq->vpl_count) {
3524 /*
3525 * Switch "local" pages to "active".
3526 */
3527 assert(!queue_empty(&lq->vpl_queue));
3528
3529 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) {
3530 VM_PAGE_CHECK(m);
3531 assert(m->local);
3532 assert(!m->active);
3533 assert(!m->inactive);
3534 assert(!m->speculative);
3535 assert(!VM_PAGE_WIRED(m));
3536 assert(!m->throttled);
3537 assert(!m->fictitious);
3538
3539 if (m->local_id != lid)
3540 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m);
3541
3542 m->local_id = 0;
3543 m->local = FALSE;
3544 m->active = TRUE;
3545 VM_PAGE_CHECK(m);
3546
3547 count++;
3548 }
3549 if (count != lq->vpl_count)
3550 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count);
3551
3552 /*
3553 * Transfer the entire local queue to a regular LRU page queues.
3554 */
3555 first_local = (vm_page_t) queue_first(&lq->vpl_queue);
3556 last_local = (vm_page_t) queue_last(&lq->vpl_queue);
3557 first_active = (vm_page_t) queue_first(&vm_page_queue_active);
3558
3559 if (queue_empty(&vm_page_queue_active)) {
3560 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local;
3561 } else {
3562 queue_prev(&first_active->pageq) = (queue_entry_t) last_local;
3563 }
3564 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local;
3565 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active;
3566 queue_next(&last_local->pageq) = (queue_entry_t) first_active;
3567
3568 queue_init(&lq->vpl_queue);
3569 /*
3570 * Adjust the global page counts.
3571 */
3572 vm_page_active_count += lq->vpl_count;
3573 vm_page_pageable_internal_count += lq->vpl_internal_count;
3574 vm_page_pageable_external_count += lq->vpl_external_count;
3575 lq->vpl_count = 0;
3576 lq->vpl_internal_count = 0;
3577 lq->vpl_external_count = 0;
3578 }
3579 assert(queue_empty(&lq->vpl_queue));
3580
3581 if (nolocks == FALSE) {
3582 VPL_UNLOCK(&lq->vpl_lock);
3583 vm_page_unlock_queues();
3584 }
3585 }
3586
3587 /*
3588 * vm_page_part_zero_fill:
3589 *
3590 * Zero-fill a part of the page.
3591 */
3592 #define PMAP_ZERO_PART_PAGE_IMPLEMENTED
3593 void
3594 vm_page_part_zero_fill(
3595 vm_page_t m,
3596 vm_offset_t m_pa,
3597 vm_size_t len)
3598 {
3599
3600 #if 0
3601 /*
3602 * we don't hold the page queue lock
3603 * so this check isn't safe to make
3604 */
3605 VM_PAGE_CHECK(m);
3606 #endif
3607
3608 #ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED
3609 pmap_zero_part_page(m->phys_page, m_pa, len);
3610 #else
3611 vm_page_t tmp;
3612 while (1) {
3613 tmp = vm_page_grab();
3614 if (tmp == VM_PAGE_NULL) {
3615 vm_page_wait(THREAD_UNINT);
3616 continue;
3617 }
3618 break;
3619 }
3620 vm_page_zero_fill(tmp);
3621 if(m_pa != 0) {
3622 vm_page_part_copy(m, 0, tmp, 0, m_pa);
3623 }
3624 if((m_pa + len) < PAGE_SIZE) {
3625 vm_page_part_copy(m, m_pa + len, tmp,
3626 m_pa + len, PAGE_SIZE - (m_pa + len));
3627 }
3628 vm_page_copy(tmp,m);
3629 VM_PAGE_FREE(tmp);
3630 #endif
3631
3632 }
3633
3634 /*
3635 * vm_page_zero_fill:
3636 *
3637 * Zero-fill the specified page.
3638 */
3639 void
3640 vm_page_zero_fill(
3641 vm_page_t m)
3642 {
3643 XPR(XPR_VM_PAGE,
3644 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n",
3645 m->object, m->offset, m, 0,0);
3646 #if 0
3647 /*
3648 * we don't hold the page queue lock
3649 * so this check isn't safe to make
3650 */
3651 VM_PAGE_CHECK(m);
3652 #endif
3653
3654 // dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */
3655 pmap_zero_page(m->phys_page);
3656 }
3657
3658 /*
3659 * vm_page_part_copy:
3660 *
3661 * copy part of one page to another
3662 */
3663
3664 void
3665 vm_page_part_copy(
3666 vm_page_t src_m,
3667 vm_offset_t src_pa,
3668 vm_page_t dst_m,
3669 vm_offset_t dst_pa,
3670 vm_size_t len)
3671 {
3672 #if 0
3673 /*
3674 * we don't hold the page queue lock
3675 * so this check isn't safe to make
3676 */
3677 VM_PAGE_CHECK(src_m);
3678 VM_PAGE_CHECK(dst_m);
3679 #endif
3680 pmap_copy_part_page(src_m->phys_page, src_pa,
3681 dst_m->phys_page, dst_pa, len);
3682 }
3683
3684 /*
3685 * vm_page_copy:
3686 *
3687 * Copy one page to another
3688 *
3689 * ENCRYPTED SWAP:
3690 * The source page should not be encrypted. The caller should
3691 * make sure the page is decrypted first, if necessary.
3692 */
3693
3694 int vm_page_copy_cs_validations = 0;
3695 int vm_page_copy_cs_tainted = 0;
3696
3697 void
3698 vm_page_copy(
3699 vm_page_t src_m,
3700 vm_page_t dest_m)
3701 {
3702 XPR(XPR_VM_PAGE,
3703 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n",
3704 src_m->object, src_m->offset,
3705 dest_m->object, dest_m->offset,
3706 0);
3707 #if 0
3708 /*
3709 * we don't hold the page queue lock
3710 * so this check isn't safe to make
3711 */
3712 VM_PAGE_CHECK(src_m);
3713 VM_PAGE_CHECK(dest_m);
3714 #endif
3715 vm_object_lock_assert_held(src_m->object);
3716
3717 /*
3718 * ENCRYPTED SWAP:
3719 * The source page should not be encrypted at this point.
3720 * The destination page will therefore not contain encrypted
3721 * data after the copy.
3722 */
3723 if (src_m->encrypted) {
3724 panic("vm_page_copy: source page %p is encrypted\n", src_m);
3725 }
3726 dest_m->encrypted = FALSE;
3727
3728 if (src_m->object != VM_OBJECT_NULL &&
3729 src_m->object->code_signed) {
3730 /*
3731 * We're copying a page from a code-signed object.
3732 * Whoever ends up mapping the copy page might care about
3733 * the original page's integrity, so let's validate the
3734 * source page now.
3735 */
3736 vm_page_copy_cs_validations++;
3737 vm_page_validate_cs(src_m);
3738 }
3739
3740 if (vm_page_is_slideable(src_m)) {
3741 boolean_t was_busy = src_m->busy;
3742 src_m->busy = TRUE;
3743 (void) vm_page_slide(src_m, 0);
3744 assert(src_m->busy);
3745 if (!was_busy) {
3746 PAGE_WAKEUP_DONE(src_m);
3747 }
3748 }
3749
3750 /*
3751 * Propagate the cs_tainted bit to the copy page. Do not propagate
3752 * the cs_validated bit.
3753 */
3754 dest_m->cs_tainted = src_m->cs_tainted;
3755 if (dest_m->cs_tainted) {
3756 vm_page_copy_cs_tainted++;
3757 }
3758 dest_m->slid = src_m->slid;
3759 dest_m->error = src_m->error; /* sliding src_m might have failed... */
3760 pmap_copy_page(src_m->phys_page, dest_m->phys_page);
3761 }
3762
3763 #if MACH_ASSERT
3764 static void
3765 _vm_page_print(
3766 vm_page_t p)
3767 {
3768 printf("vm_page %p: \n", p);
3769 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev);
3770 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev);
3771 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m));
3772 printf(" object=%p offset=0x%llx\n", p->object, p->offset);
3773 printf(" wire_count=%u\n", p->wire_count);
3774
3775 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n",
3776 (p->local ? "" : "!"),
3777 (p->inactive ? "" : "!"),
3778 (p->active ? "" : "!"),
3779 (p->pageout_queue ? "" : "!"),
3780 (p->speculative ? "" : "!"),
3781 (p->laundry ? "" : "!"));
3782 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n",
3783 (p->free ? "" : "!"),
3784 (p->reference ? "" : "!"),
3785 (p->gobbled ? "" : "!"),
3786 (p->private ? "" : "!"),
3787 (p->throttled ? "" : "!"));
3788 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n",
3789 (p->busy ? "" : "!"),
3790 (p->wanted ? "" : "!"),
3791 (p->tabled ? "" : "!"),
3792 (p->fictitious ? "" : "!"),
3793 (p->pmapped ? "" : "!"),
3794 (p->wpmapped ? "" : "!"));
3795 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n",
3796 (p->pageout ? "" : "!"),
3797 (p->absent ? "" : "!"),
3798 (p->error ? "" : "!"),
3799 (p->dirty ? "" : "!"),
3800 (p->cleaning ? "" : "!"),
3801 (p->precious ? "" : "!"),
3802 (p->clustered ? "" : "!"));
3803 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n",
3804 (p->overwriting ? "" : "!"),
3805 (p->restart ? "" : "!"),
3806 (p->unusual ? "" : "!"),
3807 (p->encrypted ? "" : "!"),
3808 (p->encrypted_cleaning ? "" : "!"));
3809 printf(" %scs_validated, %scs_tainted, %sno_cache\n",
3810 (p->cs_validated ? "" : "!"),
3811 (p->cs_tainted ? "" : "!"),
3812 (p->no_cache ? "" : "!"));
3813
3814 printf("phys_page=0x%x\n", p->phys_page);
3815 }
3816
3817 /*
3818 * Check that the list of pages is ordered by
3819 * ascending physical address and has no holes.
3820 */
3821 static int
3822 vm_page_verify_contiguous(
3823 vm_page_t pages,
3824 unsigned int npages)
3825 {
3826 register vm_page_t m;
3827 unsigned int page_count;
3828 vm_offset_t prev_addr;
3829
3830 prev_addr = pages->phys_page;
3831 page_count = 1;
3832 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) {
3833 if (m->phys_page != prev_addr + 1) {
3834 printf("m %p prev_addr 0x%lx, current addr 0x%x\n",
3835 m, (long)prev_addr, m->phys_page);
3836 printf("pages %p page_count %d npages %d\n", pages, page_count, npages);
3837 panic("vm_page_verify_contiguous: not contiguous!");
3838 }
3839 prev_addr = m->phys_page;
3840 ++page_count;
3841 }
3842 if (page_count != npages) {
3843 printf("pages %p actual count 0x%x but requested 0x%x\n",
3844 pages, page_count, npages);
3845 panic("vm_page_verify_contiguous: count error");
3846 }
3847 return 1;
3848 }
3849
3850
3851 /*
3852 * Check the free lists for proper length etc.
3853 */
3854 static boolean_t vm_page_verify_this_free_list_enabled = FALSE;
3855 static unsigned int
3856 vm_page_verify_free_list(
3857 queue_head_t *vm_page_queue,
3858 unsigned int color,
3859 vm_page_t look_for_page,
3860 boolean_t expect_page)
3861 {
3862 unsigned int npages;
3863 vm_page_t m;
3864 vm_page_t prev_m;
3865 boolean_t found_page;
3866
3867 if (! vm_page_verify_this_free_list_enabled)
3868 return 0;
3869
3870 found_page = FALSE;
3871 npages = 0;
3872 prev_m = (vm_page_t) vm_page_queue;
3873 queue_iterate(vm_page_queue,
3874 m,
3875 vm_page_t,
3876 pageq) {
3877
3878 if (m == look_for_page) {
3879 found_page = TRUE;
3880 }
3881 if ((vm_page_t) m->pageq.prev != prev_m)
3882 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n",
3883 color, npages, m, m->pageq.prev, prev_m);
3884 if ( ! m->busy )
3885 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n",
3886 color, npages, m);
3887 if (color != (unsigned int) -1) {
3888 if ((m->phys_page & vm_color_mask) != color)
3889 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n",
3890 color, npages, m, m->phys_page & vm_color_mask, color);
3891 if ( ! m->free )
3892 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n",
3893 color, npages, m);
3894 }
3895 ++npages;
3896 prev_m = m;
3897 }
3898 if (look_for_page != VM_PAGE_NULL) {
3899 unsigned int other_color;
3900
3901 if (expect_page && !found_page) {
3902 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n",
3903 color, npages, look_for_page, look_for_page->phys_page);
3904 _vm_page_print(look_for_page);
3905 for (other_color = 0;
3906 other_color < vm_colors;
3907 other_color++) {
3908 if (other_color == color)
3909 continue;
3910 vm_page_verify_free_list(&vm_page_queue_free[other_color],
3911 other_color, look_for_page, FALSE);
3912 }
3913 if (color == (unsigned int) -1) {
3914 vm_page_verify_free_list(&vm_lopage_queue_free,
3915 (unsigned int) -1, look_for_page, FALSE);
3916 }
3917 panic("vm_page_verify_free_list(color=%u)\n", color);
3918 }
3919 if (!expect_page && found_page) {
3920 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n",
3921 color, npages, look_for_page, look_for_page->phys_page);
3922 }
3923 }
3924 return npages;
3925 }
3926
3927 static boolean_t vm_page_verify_all_free_lists_enabled = FALSE;
3928 static void
3929 vm_page_verify_free_lists( void )
3930 {
3931 unsigned int color, npages, nlopages;
3932 boolean_t toggle = TRUE;
3933
3934 if (! vm_page_verify_all_free_lists_enabled)
3935 return;
3936
3937 npages = 0;
3938
3939 lck_mtx_lock(&vm_page_queue_free_lock);
3940
3941 if (vm_page_verify_this_free_list_enabled == TRUE) {
3942 /*
3943 * This variable has been set globally for extra checking of
3944 * each free list Q. Since we didn't set it, we don't own it
3945 * and we shouldn't toggle it.
3946 */
3947 toggle = FALSE;
3948 }
3949
3950 if (toggle == TRUE) {
3951 vm_page_verify_this_free_list_enabled = TRUE;
3952 }
3953
3954 for( color = 0; color < vm_colors; color++ ) {
3955 npages += vm_page_verify_free_list(&vm_page_queue_free[color],
3956 color, VM_PAGE_NULL, FALSE);
3957 }
3958 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free,
3959 (unsigned int) -1,
3960 VM_PAGE_NULL, FALSE);
3961 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count)
3962 panic("vm_page_verify_free_lists: "
3963 "npages %u free_count %d nlopages %u lo_free_count %u",
3964 npages, vm_page_free_count, nlopages, vm_lopage_free_count);
3965
3966 if (toggle == TRUE) {
3967 vm_page_verify_this_free_list_enabled = FALSE;
3968 }
3969
3970 lck_mtx_unlock(&vm_page_queue_free_lock);
3971 }
3972
3973 void
3974 vm_page_queues_assert(
3975 vm_page_t mem,
3976 int val)
3977 {
3978 #if DEBUG
3979 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
3980 #endif
3981 if (mem->free + mem->active + mem->inactive + mem->speculative +
3982 mem->throttled + mem->pageout_queue > (val)) {
3983 _vm_page_print(mem);
3984 panic("vm_page_queues_assert(%p, %d)\n", mem, val);
3985 }
3986 if (VM_PAGE_WIRED(mem)) {
3987 assert(!mem->active);
3988 assert(!mem->inactive);
3989 assert(!mem->speculative);
3990 assert(!mem->throttled);
3991 assert(!mem->pageout_queue);
3992 }
3993 }
3994 #endif /* MACH_ASSERT */
3995
3996
3997 /*
3998 * CONTIGUOUS PAGE ALLOCATION
3999 *
4000 * Find a region large enough to contain at least n pages
4001 * of contiguous physical memory.
4002 *
4003 * This is done by traversing the vm_page_t array in a linear fashion
4004 * we assume that the vm_page_t array has the avaiable physical pages in an
4005 * ordered, ascending list... this is currently true of all our implementations
4006 * and must remain so... there can be 'holes' in the array... we also can
4007 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed
4008 * which use to happen via 'vm_page_convert'... that function was no longer
4009 * being called and was removed...
4010 *
4011 * The basic flow consists of stabilizing some of the interesting state of
4012 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our
4013 * sweep at the beginning of the array looking for pages that meet our criterea
4014 * for a 'stealable' page... currently we are pretty conservative... if the page
4015 * meets this criterea and is physically contiguous to the previous page in the 'run'
4016 * we keep developing it. If we hit a page that doesn't fit, we reset our state
4017 * and start to develop a new run... if at this point we've already considered
4018 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold,
4019 * and mutex_pause (which will yield the processor), to keep the latency low w/r
4020 * to other threads trying to acquire free pages (or move pages from q to q),
4021 * and then continue from the spot we left off... we only make 1 pass through the
4022 * array. Once we have a 'run' that is long enough, we'll go into the loop which
4023 * which steals the pages from the queues they're currently on... pages on the free
4024 * queue can be stolen directly... pages that are on any of the other queues
4025 * must be removed from the object they are tabled on... this requires taking the
4026 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails
4027 * or if the state of the page behind the vm_object lock is no longer viable, we'll
4028 * dump the pages we've currently stolen back to the free list, and pick up our
4029 * scan from the point where we aborted the 'current' run.
4030 *
4031 *
4032 * Requirements:
4033 * - neither vm_page_queue nor vm_free_list lock can be held on entry
4034 *
4035 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL.
4036 *
4037 * Algorithm:
4038 */
4039
4040 #define MAX_CONSIDERED_BEFORE_YIELD 1000
4041
4042
4043 #define RESET_STATE_OF_RUN() \
4044 MACRO_BEGIN \
4045 prevcontaddr = -2; \
4046 start_pnum = -1; \
4047 free_considered = 0; \
4048 substitute_needed = 0; \
4049 npages = 0; \
4050 MACRO_END
4051
4052 /*
4053 * Can we steal in-use (i.e. not free) pages when searching for
4054 * physically-contiguous pages ?
4055 */
4056 #define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1
4057
4058 static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0;
4059 #if DEBUG
4060 int vm_page_find_contig_debug = 0;
4061 #endif
4062
4063 static vm_page_t
4064 vm_page_find_contiguous(
4065 unsigned int contig_pages,
4066 ppnum_t max_pnum,
4067 ppnum_t pnum_mask,
4068 boolean_t wire,
4069 int flags)
4070 {
4071 vm_page_t m = NULL;
4072 ppnum_t prevcontaddr;
4073 ppnum_t start_pnum;
4074 unsigned int npages, considered, scanned;
4075 unsigned int page_idx, start_idx, last_idx, orig_last_idx;
4076 unsigned int idx_last_contig_page_found = 0;
4077 int free_considered, free_available;
4078 int substitute_needed;
4079 boolean_t wrapped;
4080 #if DEBUG
4081 clock_sec_t tv_start_sec, tv_end_sec;
4082 clock_usec_t tv_start_usec, tv_end_usec;
4083 #endif
4084 #if MACH_ASSERT
4085 int yielded = 0;
4086 int dumped_run = 0;
4087 int stolen_pages = 0;
4088 int compressed_pages = 0;
4089 #endif
4090
4091 if (contig_pages == 0)
4092 return VM_PAGE_NULL;
4093
4094 #if MACH_ASSERT
4095 vm_page_verify_free_lists();
4096 #endif
4097 #if DEBUG
4098 clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
4099 #endif
4100 PAGE_REPLACEMENT_ALLOWED(TRUE);
4101
4102 vm_page_lock_queues();
4103 lck_mtx_lock(&vm_page_queue_free_lock);
4104
4105 RESET_STATE_OF_RUN();
4106
4107 scanned = 0;
4108 considered = 0;
4109 free_available = vm_page_free_count - vm_page_free_reserved;
4110
4111 wrapped = FALSE;
4112
4113 if(flags & KMA_LOMEM)
4114 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx;
4115 else
4116 idx_last_contig_page_found = vm_page_find_contiguous_last_idx;
4117
4118 orig_last_idx = idx_last_contig_page_found;
4119 last_idx = orig_last_idx;
4120
4121 for (page_idx = last_idx, start_idx = last_idx;
4122 npages < contig_pages && page_idx < vm_pages_count;
4123 page_idx++) {
4124 retry:
4125 if (wrapped &&
4126 npages == 0 &&
4127 page_idx >= orig_last_idx) {
4128 /*
4129 * We're back where we started and we haven't
4130 * found any suitable contiguous range. Let's
4131 * give up.
4132 */
4133 break;
4134 }
4135 scanned++;
4136 m = &vm_pages[page_idx];
4137
4138 assert(!m->fictitious);
4139 assert(!m->private);
4140
4141 if (max_pnum && m->phys_page > max_pnum) {
4142 /* no more low pages... */
4143 break;
4144 }
4145 if (!npages & ((m->phys_page & pnum_mask) != 0)) {
4146 /*
4147 * not aligned
4148 */
4149 RESET_STATE_OF_RUN();
4150
4151 } else if (VM_PAGE_WIRED(m) || m->gobbled ||
4152 m->encrypted_cleaning ||
4153 m->pageout_queue || m->laundry || m->wanted ||
4154 m->cleaning || m->overwriting || m->pageout) {
4155 /*
4156 * page is in a transient state
4157 * or a state we don't want to deal
4158 * with, so don't consider it which
4159 * means starting a new run
4160 */
4161 RESET_STATE_OF_RUN();
4162
4163 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) {
4164 /*
4165 * page needs to be on one of our queues
4166 * or it needs to belong to the compressor pool
4167 * in order for it to be stable behind the
4168 * locks we hold at this point...
4169 * if not, don't consider it which
4170 * means starting a new run
4171 */
4172 RESET_STATE_OF_RUN();
4173
4174 } else if (!m->free && (!m->tabled || m->busy)) {
4175 /*
4176 * pages on the free list are always 'busy'
4177 * so we couldn't test for 'busy' in the check
4178 * for the transient states... pages that are
4179 * 'free' are never 'tabled', so we also couldn't
4180 * test for 'tabled'. So we check here to make
4181 * sure that a non-free page is not busy and is
4182 * tabled on an object...
4183 * if not, don't consider it which
4184 * means starting a new run
4185 */
4186 RESET_STATE_OF_RUN();
4187
4188 } else {
4189 if (m->phys_page != prevcontaddr + 1) {
4190 if ((m->phys_page & pnum_mask) != 0) {
4191 RESET_STATE_OF_RUN();
4192 goto did_consider;
4193 } else {
4194 npages = 1;
4195 start_idx = page_idx;
4196 start_pnum = m->phys_page;
4197 }
4198 } else {
4199 npages++;
4200 }
4201 prevcontaddr = m->phys_page;
4202
4203 VM_PAGE_CHECK(m);
4204 if (m->free) {
4205 free_considered++;
4206 } else {
4207 /*
4208 * This page is not free.
4209 * If we can't steal used pages,
4210 * we have to give up this run
4211 * and keep looking.
4212 * Otherwise, we might need to
4213 * move the contents of this page
4214 * into a substitute page.
4215 */
4216 #if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4217 if (m->pmapped || m->dirty || m->precious) {
4218 substitute_needed++;
4219 }
4220 #else
4221 RESET_STATE_OF_RUN();
4222 #endif
4223 }
4224
4225 if ((free_considered + substitute_needed) > free_available) {
4226 /*
4227 * if we let this run continue
4228 * we will end up dropping the vm_page_free_count
4229 * below the reserve limit... we need to abort
4230 * this run, but we can at least re-consider this
4231 * page... thus the jump back to 'retry'
4232 */
4233 RESET_STATE_OF_RUN();
4234
4235 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) {
4236 considered++;
4237 goto retry;
4238 }
4239 /*
4240 * free_available == 0
4241 * so can't consider any free pages... if
4242 * we went to retry in this case, we'd
4243 * get stuck looking at the same page
4244 * w/o making any forward progress
4245 * we also want to take this path if we've already
4246 * reached our limit that controls the lock latency
4247 */
4248 }
4249 }
4250 did_consider:
4251 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) {
4252
4253 PAGE_REPLACEMENT_ALLOWED(FALSE);
4254
4255 lck_mtx_unlock(&vm_page_queue_free_lock);
4256 vm_page_unlock_queues();
4257
4258 mutex_pause(0);
4259
4260 PAGE_REPLACEMENT_ALLOWED(TRUE);
4261
4262 vm_page_lock_queues();
4263 lck_mtx_lock(&vm_page_queue_free_lock);
4264
4265 RESET_STATE_OF_RUN();
4266 /*
4267 * reset our free page limit since we
4268 * dropped the lock protecting the vm_page_free_queue
4269 */
4270 free_available = vm_page_free_count - vm_page_free_reserved;
4271 considered = 0;
4272 #if MACH_ASSERT
4273 yielded++;
4274 #endif
4275 goto retry;
4276 }
4277 considered++;
4278 }
4279 m = VM_PAGE_NULL;
4280
4281 if (npages != contig_pages) {
4282 if (!wrapped) {
4283 /*
4284 * We didn't find a contiguous range but we didn't
4285 * start from the very first page.
4286 * Start again from the very first page.
4287 */
4288 RESET_STATE_OF_RUN();
4289 if( flags & KMA_LOMEM)
4290 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0;
4291 else
4292 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0;
4293 last_idx = 0;
4294 page_idx = last_idx;
4295 wrapped = TRUE;
4296 goto retry;
4297 }
4298 lck_mtx_unlock(&vm_page_queue_free_lock);
4299 } else {
4300 vm_page_t m1;
4301 vm_page_t m2;
4302 unsigned int cur_idx;
4303 unsigned int tmp_start_idx;
4304 vm_object_t locked_object = VM_OBJECT_NULL;
4305 boolean_t abort_run = FALSE;
4306
4307 assert(page_idx - start_idx == contig_pages);
4308
4309 tmp_start_idx = start_idx;
4310
4311 /*
4312 * first pass through to pull the free pages
4313 * off of the free queue so that in case we
4314 * need substitute pages, we won't grab any
4315 * of the free pages in the run... we'll clear
4316 * the 'free' bit in the 2nd pass, and even in
4317 * an abort_run case, we'll collect all of the
4318 * free pages in this run and return them to the free list
4319 */
4320 while (start_idx < page_idx) {
4321
4322 m1 = &vm_pages[start_idx++];
4323
4324 #if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL
4325 assert(m1->free);
4326 #endif
4327
4328 if (m1->free) {
4329 unsigned int color;
4330
4331 color = m1->phys_page & vm_color_mask;
4332 #if MACH_ASSERT
4333 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE);
4334 #endif
4335 queue_remove(&vm_page_queue_free[color],
4336 m1,
4337 vm_page_t,
4338 pageq);
4339 m1->pageq.next = NULL;
4340 m1->pageq.prev = NULL;
4341 #if MACH_ASSERT
4342 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE);
4343 #endif
4344 /*
4345 * Clear the "free" bit so that this page
4346 * does not get considered for another
4347 * concurrent physically-contiguous allocation.
4348 */
4349 m1->free = FALSE;
4350 assert(m1->busy);
4351
4352 vm_page_free_count--;
4353 }
4354 }
4355 if( flags & KMA_LOMEM)
4356 vm_page_lomem_find_contiguous_last_idx = page_idx;
4357 else
4358 vm_page_find_contiguous_last_idx = page_idx;
4359
4360 /*
4361 * we can drop the free queue lock at this point since
4362 * we've pulled any 'free' candidates off of the list
4363 * we need it dropped so that we can do a vm_page_grab
4364 * when substituing for pmapped/dirty pages
4365 */
4366 lck_mtx_unlock(&vm_page_queue_free_lock);
4367
4368 start_idx = tmp_start_idx;
4369 cur_idx = page_idx - 1;
4370
4371 while (start_idx++ < page_idx) {
4372 /*
4373 * must go through the list from back to front
4374 * so that the page list is created in the
4375 * correct order - low -> high phys addresses
4376 */
4377 m1 = &vm_pages[cur_idx--];
4378
4379 assert(!m1->free);
4380
4381 if (m1->object == VM_OBJECT_NULL) {
4382 /*
4383 * page has already been removed from
4384 * the free list in the 1st pass
4385 */
4386 assert(m1->offset == (vm_object_offset_t) -1);
4387 assert(m1->busy);
4388 assert(!m1->wanted);
4389 assert(!m1->laundry);
4390 } else {
4391 vm_object_t object;
4392 int refmod;
4393 boolean_t disconnected, reusable;
4394
4395 if (abort_run == TRUE)
4396 continue;
4397
4398 object = m1->object;
4399
4400 if (object != locked_object) {
4401 if (locked_object) {
4402 vm_object_unlock(locked_object);
4403 locked_object = VM_OBJECT_NULL;
4404 }
4405 if (vm_object_lock_try(object))
4406 locked_object = object;
4407 }
4408 if (locked_object == VM_OBJECT_NULL ||
4409 (VM_PAGE_WIRED(m1) || m1->gobbled ||
4410 m1->encrypted_cleaning ||
4411 m1->pageout_queue || m1->laundry || m1->wanted ||
4412 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) {
4413
4414 if (locked_object) {
4415 vm_object_unlock(locked_object);
4416 locked_object = VM_OBJECT_NULL;
4417 }
4418 tmp_start_idx = cur_idx;
4419 abort_run = TRUE;
4420 continue;
4421 }
4422
4423 disconnected = FALSE;
4424 reusable = FALSE;
4425
4426 if ((m1->reusable ||
4427 m1->object->all_reusable) &&
4428 m1->inactive &&
4429 !m1->dirty &&
4430 !m1->reference) {
4431 /* reusable page... */
4432 refmod = pmap_disconnect(m1->phys_page);
4433 disconnected = TRUE;
4434 if (refmod == 0) {
4435 /*
4436 * ... not reused: can steal
4437 * without relocating contents.
4438 */
4439 reusable = TRUE;
4440 }
4441 }
4442
4443 if ((m1->pmapped &&
4444 ! reusable) ||
4445 m1->dirty ||
4446 m1->precious) {
4447 vm_object_offset_t offset;
4448
4449 m2 = vm_page_grab();
4450
4451 if (m2 == VM_PAGE_NULL) {
4452 if (locked_object) {
4453 vm_object_unlock(locked_object);
4454 locked_object = VM_OBJECT_NULL;
4455 }
4456 tmp_start_idx = cur_idx;
4457 abort_run = TRUE;
4458 continue;
4459 }
4460 if (! disconnected) {
4461 if (m1->pmapped)
4462 refmod = pmap_disconnect(m1->phys_page);
4463 else
4464 refmod = 0;
4465 }
4466
4467 /* copy the page's contents */
4468 pmap_copy_page(m1->phys_page, m2->phys_page);
4469 /* copy the page's state */
4470 assert(!VM_PAGE_WIRED(m1));
4471 assert(!m1->free);
4472 assert(!m1->pageout_queue);
4473 assert(!m1->laundry);
4474 m2->reference = m1->reference;
4475 assert(!m1->gobbled);
4476 assert(!m1->private);
4477 m2->no_cache = m1->no_cache;
4478 m2->xpmapped = 0;
4479 assert(!m1->busy);
4480 assert(!m1->wanted);
4481 assert(!m1->fictitious);
4482 m2->pmapped = m1->pmapped; /* should flush cache ? */
4483 m2->wpmapped = m1->wpmapped;
4484 assert(!m1->pageout);
4485 m2->absent = m1->absent;
4486 m2->error = m1->error;
4487 m2->dirty = m1->dirty;
4488 assert(!m1->cleaning);
4489 m2->precious = m1->precious;
4490 m2->clustered = m1->clustered;
4491 assert(!m1->overwriting);
4492 m2->restart = m1->restart;
4493 m2->unusual = m1->unusual;
4494 m2->encrypted = m1->encrypted;
4495 assert(!m1->encrypted_cleaning);
4496 m2->cs_validated = m1->cs_validated;
4497 m2->cs_tainted = m1->cs_tainted;
4498
4499 /*
4500 * If m1 had really been reusable,
4501 * we would have just stolen it, so
4502 * let's not propagate it's "reusable"
4503 * bit and assert that m2 is not
4504 * marked as "reusable".
4505 */
4506 // m2->reusable = m1->reusable;
4507 assert(!m2->reusable);
4508
4509 assert(!m1->lopage);
4510 m2->slid = m1->slid;
4511 m2->compressor = m1->compressor;
4512
4513 /*
4514 * page may need to be flushed if
4515 * it is marshalled into a UPL
4516 * that is going to be used by a device
4517 * that doesn't support coherency
4518 */
4519 m2->written_by_kernel = TRUE;
4520
4521 /*
4522 * make sure we clear the ref/mod state
4523 * from the pmap layer... else we risk
4524 * inheriting state from the last time
4525 * this page was used...
4526 */
4527 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
4528
4529 if (refmod & VM_MEM_REFERENCED)
4530 m2->reference = TRUE;
4531 if (refmod & VM_MEM_MODIFIED) {
4532 SET_PAGE_DIRTY(m2, TRUE);
4533 }
4534 offset = m1->offset;
4535
4536 /*
4537 * completely cleans up the state
4538 * of the page so that it is ready
4539 * to be put onto the free list, or
4540 * for this purpose it looks like it
4541 * just came off of the free list
4542 */
4543 vm_page_free_prepare(m1);
4544
4545 /*
4546 * now put the substitute page
4547 * on the object
4548 */
4549 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE);
4550
4551 if (m2->compressor) {
4552 m2->pmapped = TRUE;
4553 m2->wpmapped = TRUE;
4554
4555 PMAP_ENTER(kernel_pmap, m2->offset, m2,
4556 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE);
4557 #if MACH_ASSERT
4558 compressed_pages++;
4559 #endif
4560 } else {
4561 if (m2->reference)
4562 vm_page_activate(m2);
4563 else
4564 vm_page_deactivate(m2);
4565 }
4566 PAGE_WAKEUP_DONE(m2);
4567
4568 } else {
4569 assert(!m1->compressor);
4570
4571 /*
4572 * completely cleans up the state
4573 * of the page so that it is ready
4574 * to be put onto the free list, or
4575 * for this purpose it looks like it
4576 * just came off of the free list
4577 */
4578 vm_page_free_prepare(m1);
4579 }
4580 #if MACH_ASSERT
4581 stolen_pages++;
4582 #endif
4583 }
4584 m1->pageq.next = (queue_entry_t) m;
4585 m1->pageq.prev = NULL;
4586 m = m1;
4587 }
4588 if (locked_object) {
4589 vm_object_unlock(locked_object);
4590 locked_object = VM_OBJECT_NULL;
4591 }
4592
4593 if (abort_run == TRUE) {
4594 if (m != VM_PAGE_NULL) {
4595 vm_page_free_list(m, FALSE);
4596 }
4597 #if MACH_ASSERT
4598 dumped_run++;
4599 #endif
4600 /*
4601 * want the index of the last
4602 * page in this run that was
4603 * successfully 'stolen', so back
4604 * it up 1 for the auto-decrement on use
4605 * and 1 more to bump back over this page
4606 */
4607 page_idx = tmp_start_idx + 2;
4608 if (page_idx >= vm_pages_count) {
4609 if (wrapped)
4610 goto done_scanning;
4611 page_idx = last_idx = 0;
4612 wrapped = TRUE;
4613 }
4614 abort_run = FALSE;
4615
4616 /*
4617 * We didn't find a contiguous range but we didn't
4618 * start from the very first page.
4619 * Start again from the very first page.
4620 */
4621 RESET_STATE_OF_RUN();
4622
4623 if( flags & KMA_LOMEM)
4624 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx;
4625 else
4626 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx;
4627
4628 last_idx = page_idx;
4629
4630 lck_mtx_lock(&vm_page_queue_free_lock);
4631 /*
4632 * reset our free page limit since we
4633 * dropped the lock protecting the vm_page_free_queue
4634 */
4635 free_available = vm_page_free_count - vm_page_free_reserved;
4636 goto retry;
4637 }
4638
4639 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) {
4640
4641 if (wire == TRUE)
4642 m1->wire_count++;
4643 else
4644 m1->gobbled = TRUE;
4645 }
4646 if (wire == FALSE)
4647 vm_page_gobble_count += npages;
4648
4649 /*
4650 * gobbled pages are also counted as wired pages
4651 */
4652 vm_page_wire_count += npages;
4653
4654 assert(vm_page_verify_contiguous(m, npages));
4655 }
4656 done_scanning:
4657 PAGE_REPLACEMENT_ALLOWED(FALSE);
4658
4659 vm_page_unlock_queues();
4660
4661 #if DEBUG
4662 clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
4663
4664 tv_end_sec -= tv_start_sec;
4665 if (tv_end_usec < tv_start_usec) {
4666 tv_end_sec--;
4667 tv_end_usec += 1000000;
4668 }
4669 tv_end_usec -= tv_start_usec;
4670 if (tv_end_usec >= 1000000) {
4671 tv_end_sec++;
4672 tv_end_sec -= 1000000;
4673 }
4674 if (vm_page_find_contig_debug) {
4675 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n",
4676 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT,
4677 (long)tv_end_sec, tv_end_usec, orig_last_idx,
4678 scanned, yielded, dumped_run, stolen_pages, compressed_pages);
4679 }
4680
4681 #endif
4682 #if MACH_ASSERT
4683 vm_page_verify_free_lists();
4684 #endif
4685 return m;
4686 }
4687
4688 /*
4689 * Allocate a list of contiguous, wired pages.
4690 */
4691 kern_return_t
4692 cpm_allocate(
4693 vm_size_t size,
4694 vm_page_t *list,
4695 ppnum_t max_pnum,
4696 ppnum_t pnum_mask,
4697 boolean_t wire,
4698 int flags)
4699 {
4700 vm_page_t pages;
4701 unsigned int npages;
4702
4703 if (size % PAGE_SIZE != 0)
4704 return KERN_INVALID_ARGUMENT;
4705
4706 npages = (unsigned int) (size / PAGE_SIZE);
4707 if (npages != size / PAGE_SIZE) {
4708 /* 32-bit overflow */
4709 return KERN_INVALID_ARGUMENT;
4710 }
4711
4712 /*
4713 * Obtain a pointer to a subset of the free
4714 * list large enough to satisfy the request;
4715 * the region will be physically contiguous.
4716 */
4717 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags);
4718
4719 if (pages == VM_PAGE_NULL)
4720 return KERN_NO_SPACE;
4721 /*
4722 * determine need for wakeups
4723 */
4724 if ((vm_page_free_count < vm_page_free_min) ||
4725 ((vm_page_free_count < vm_page_free_target) &&
4726 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min)))
4727 thread_wakeup((event_t) &vm_page_free_wanted);
4728
4729 VM_CHECK_MEMORYSTATUS;
4730
4731 /*
4732 * The CPM pages should now be available and
4733 * ordered by ascending physical address.
4734 */
4735 assert(vm_page_verify_contiguous(pages, npages));
4736
4737 *list = pages;
4738 return KERN_SUCCESS;
4739 }
4740
4741
4742 unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT;
4743
4744 /*
4745 * when working on a 'run' of pages, it is necessary to hold
4746 * the vm_page_queue_lock (a hot global lock) for certain operations
4747 * on the page... however, the majority of the work can be done
4748 * while merely holding the object lock... in fact there are certain
4749 * collections of pages that don't require any work brokered by the
4750 * vm_page_queue_lock... to mitigate the time spent behind the global
4751 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT
4752 * while doing all of the work that doesn't require the vm_page_queue_lock...
4753 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the
4754 * necessary work for each page... we will grab the busy bit on the page
4755 * if it's not already held so that vm_page_do_delayed_work can drop the object lock
4756 * if it can't immediately take the vm_page_queue_lock in order to compete
4757 * for the locks in the same order that vm_pageout_scan takes them.
4758 * the operation names are modeled after the names of the routines that
4759 * need to be called in order to make the changes very obvious in the
4760 * original loop
4761 */
4762
4763 void
4764 vm_page_do_delayed_work(
4765 vm_object_t object,
4766 struct vm_page_delayed_work *dwp,
4767 int dw_count)
4768 {
4769 int j;
4770 vm_page_t m;
4771 vm_page_t local_free_q = VM_PAGE_NULL;
4772
4773 /*
4774 * pageout_scan takes the vm_page_lock_queues first
4775 * then tries for the object lock... to avoid what
4776 * is effectively a lock inversion, we'll go to the
4777 * trouble of taking them in that same order... otherwise
4778 * if this object contains the majority of the pages resident
4779 * in the UBC (or a small set of large objects actively being
4780 * worked on contain the majority of the pages), we could
4781 * cause the pageout_scan thread to 'starve' in its attempt
4782 * to find pages to move to the free queue, since it has to
4783 * successfully acquire the object lock of any candidate page
4784 * before it can steal/clean it.
4785 */
4786 if (!vm_page_trylockspin_queues()) {
4787 vm_object_unlock(object);
4788
4789 vm_page_lockspin_queues();
4790
4791 for (j = 0; ; j++) {
4792 if (!vm_object_lock_avoid(object) &&
4793 _vm_object_lock_try(object))
4794 break;
4795 vm_page_unlock_queues();
4796 mutex_pause(j);
4797 vm_page_lockspin_queues();
4798 }
4799 }
4800 for (j = 0; j < dw_count; j++, dwp++) {
4801
4802 m = dwp->dw_m;
4803
4804 if (dwp->dw_mask & DW_vm_pageout_throttle_up)
4805 vm_pageout_throttle_up(m);
4806 #if CONFIG_PHANTOM_CACHE
4807 if (dwp->dw_mask & DW_vm_phantom_cache_update)
4808 vm_phantom_cache_update(m);
4809 #endif
4810 if (dwp->dw_mask & DW_vm_page_wire)
4811 vm_page_wire(m);
4812 else if (dwp->dw_mask & DW_vm_page_unwire) {
4813 boolean_t queueit;
4814
4815 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE;
4816
4817 vm_page_unwire(m, queueit);
4818 }
4819 if (dwp->dw_mask & DW_vm_page_free) {
4820 vm_page_free_prepare_queues(m);
4821
4822 assert(m->pageq.next == NULL && m->pageq.prev == NULL);
4823 /*
4824 * Add this page to our list of reclaimed pages,
4825 * to be freed later.
4826 */
4827 m->pageq.next = (queue_entry_t) local_free_q;
4828 local_free_q = m;
4829 } else {
4830 if (dwp->dw_mask & DW_vm_page_deactivate_internal)
4831 vm_page_deactivate_internal(m, FALSE);
4832 else if (dwp->dw_mask & DW_vm_page_activate) {
4833 if (m->active == FALSE) {
4834 vm_page_activate(m);
4835 }
4836 }
4837 else if (dwp->dw_mask & DW_vm_page_speculate)
4838 vm_page_speculate(m, TRUE);
4839 else if (dwp->dw_mask & DW_enqueue_cleaned) {
4840 /*
4841 * if we didn't hold the object lock and did this,
4842 * we might disconnect the page, then someone might
4843 * soft fault it back in, then we would put it on the
4844 * cleaned queue, and so we would have a referenced (maybe even dirty)
4845 * page on that queue, which we don't want
4846 */
4847 int refmod_state = pmap_disconnect(m->phys_page);
4848
4849 if ((refmod_state & VM_MEM_REFERENCED)) {
4850 /*
4851 * this page has been touched since it got cleaned; let's activate it
4852 * if it hasn't already been
4853 */
4854 vm_pageout_enqueued_cleaned++;
4855 vm_pageout_cleaned_reactivated++;
4856 vm_pageout_cleaned_commit_reactivated++;
4857
4858 if (m->active == FALSE)
4859 vm_page_activate(m);
4860 } else {
4861 m->reference = FALSE;
4862 vm_page_enqueue_cleaned(m);
4863 }
4864 }
4865 else if (dwp->dw_mask & DW_vm_page_lru)
4866 vm_page_lru(m);
4867 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) {
4868 if ( !m->pageout_queue)
4869 VM_PAGE_QUEUES_REMOVE(m);
4870 }
4871 if (dwp->dw_mask & DW_set_reference)
4872 m->reference = TRUE;
4873 else if (dwp->dw_mask & DW_clear_reference)
4874 m->reference = FALSE;
4875
4876 if (dwp->dw_mask & DW_move_page) {
4877 if ( !m->pageout_queue) {
4878 VM_PAGE_QUEUES_REMOVE(m);
4879
4880 assert(m->object != kernel_object);
4881
4882 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE);
4883 }
4884 }
4885 if (dwp->dw_mask & DW_clear_busy)
4886 m->busy = FALSE;
4887
4888 if (dwp->dw_mask & DW_PAGE_WAKEUP)
4889 PAGE_WAKEUP(m);
4890 }
4891 }
4892 vm_page_unlock_queues();
4893
4894 if (local_free_q)
4895 vm_page_free_list(local_free_q, TRUE);
4896
4897 VM_CHECK_MEMORYSTATUS;
4898
4899 }
4900
4901 kern_return_t
4902 vm_page_alloc_list(
4903 int page_count,
4904 int flags,
4905 vm_page_t *list)
4906 {
4907 vm_page_t lo_page_list = VM_PAGE_NULL;
4908 vm_page_t mem;
4909 int i;
4910
4911 if ( !(flags & KMA_LOMEM))
4912 panic("vm_page_alloc_list: called w/o KMA_LOMEM");
4913
4914 for (i = 0; i < page_count; i++) {
4915
4916 mem = vm_page_grablo();
4917
4918 if (mem == VM_PAGE_NULL) {
4919 if (lo_page_list)
4920 vm_page_free_list(lo_page_list, FALSE);
4921
4922 *list = VM_PAGE_NULL;
4923
4924 return (KERN_RESOURCE_SHORTAGE);
4925 }
4926 mem->pageq.next = (queue_entry_t) lo_page_list;
4927 lo_page_list = mem;
4928 }
4929 *list = lo_page_list;
4930
4931 return (KERN_SUCCESS);
4932 }
4933
4934 void
4935 vm_page_set_offset(vm_page_t page, vm_object_offset_t offset)
4936 {
4937 page->offset = offset;
4938 }
4939
4940 vm_page_t
4941 vm_page_get_next(vm_page_t page)
4942 {
4943 return ((vm_page_t) page->pageq.next);
4944 }
4945
4946 vm_object_offset_t
4947 vm_page_get_offset(vm_page_t page)
4948 {
4949 return (page->offset);
4950 }
4951
4952 ppnum_t
4953 vm_page_get_phys_page(vm_page_t page)
4954 {
4955 return (page->phys_page);
4956 }
4957
4958
4959 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
4960
4961 #if HIBERNATION
4962
4963 static vm_page_t hibernate_gobble_queue;
4964
4965 extern boolean_t (* volatile consider_buffer_cache_collect)(int);
4966
4967 static int hibernate_drain_pageout_queue(struct vm_pageout_queue *);
4968 static int hibernate_flush_dirty_pages(int);
4969 static int hibernate_flush_queue(queue_head_t *, int);
4970
4971 void hibernate_flush_wait(void);
4972 void hibernate_mark_in_progress(void);
4973 void hibernate_clear_in_progress(void);
4974
4975 void hibernate_free_range(int, int);
4976 void hibernate_hash_insert_page(vm_page_t);
4977 uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *);
4978 void hibernate_rebuild_vm_structs(void);
4979 uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *);
4980 ppnum_t hibernate_lookup_paddr(unsigned int);
4981
4982 struct hibernate_statistics {
4983 int hibernate_considered;
4984 int hibernate_reentered_on_q;
4985 int hibernate_found_dirty;
4986 int hibernate_skipped_cleaning;
4987 int hibernate_skipped_transient;
4988 int hibernate_skipped_precious;
4989 int hibernate_skipped_external;
4990 int hibernate_queue_nolock;
4991 int hibernate_queue_paused;
4992 int hibernate_throttled;
4993 int hibernate_throttle_timeout;
4994 int hibernate_drained;
4995 int hibernate_drain_timeout;
4996 int cd_lock_failed;
4997 int cd_found_precious;
4998 int cd_found_wired;
4999 int cd_found_busy;
5000 int cd_found_unusual;
5001 int cd_found_cleaning;
5002 int cd_found_laundry;
5003 int cd_found_dirty;
5004 int cd_found_xpmapped;
5005 int cd_skipped_xpmapped;
5006 int cd_local_free;
5007 int cd_total_free;
5008 int cd_vm_page_wire_count;
5009 int cd_vm_struct_pages_unneeded;
5010 int cd_pages;
5011 int cd_discarded;
5012 int cd_count_wire;
5013 } hibernate_stats;
5014
5015
5016 /*
5017 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image
5018 * so that we don't overrun the estimated image size, which would
5019 * result in a hibernation failure.
5020 */
5021 #define HIBERNATE_XPMAPPED_LIMIT 40000
5022
5023
5024 static int
5025 hibernate_drain_pageout_queue(struct vm_pageout_queue *q)
5026 {
5027 wait_result_t wait_result;
5028
5029 vm_page_lock_queues();
5030
5031 while ( !queue_empty(&q->pgo_pending) ) {
5032
5033 q->pgo_draining = TRUE;
5034
5035 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC);
5036
5037 vm_page_unlock_queues();
5038
5039 wait_result = thread_block(THREAD_CONTINUE_NULL);
5040
5041 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) {
5042 hibernate_stats.hibernate_drain_timeout++;
5043
5044 if (q == &vm_pageout_queue_external)
5045 return (0);
5046
5047 return (1);
5048 }
5049 vm_page_lock_queues();
5050
5051 hibernate_stats.hibernate_drained++;
5052 }
5053 vm_page_unlock_queues();
5054
5055 return (0);
5056 }
5057
5058
5059 boolean_t hibernate_skip_external = FALSE;
5060
5061 static int
5062 hibernate_flush_queue(queue_head_t *q, int qcount)
5063 {
5064 vm_page_t m;
5065 vm_object_t l_object = NULL;
5066 vm_object_t m_object = NULL;
5067 int refmod_state = 0;
5068 int try_failed_count = 0;
5069 int retval = 0;
5070 int current_run = 0;
5071 struct vm_pageout_queue *iq;
5072 struct vm_pageout_queue *eq;
5073 struct vm_pageout_queue *tq;
5074
5075
5076 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0);
5077
5078 iq = &vm_pageout_queue_internal;
5079 eq = &vm_pageout_queue_external;
5080
5081 vm_page_lock_queues();
5082
5083 while (qcount && !queue_empty(q)) {
5084
5085 if (current_run++ == 1000) {
5086 if (hibernate_should_abort()) {
5087 retval = 1;
5088 break;
5089 }
5090 current_run = 0;
5091 }
5092
5093 m = (vm_page_t) queue_first(q);
5094 m_object = m->object;
5095
5096 /*
5097 * check to see if we currently are working
5098 * with the same object... if so, we've
5099 * already got the lock
5100 */
5101 if (m_object != l_object) {
5102 /*
5103 * the object associated with candidate page is
5104 * different from the one we were just working
5105 * with... dump the lock if we still own it
5106 */
5107 if (l_object != NULL) {
5108 vm_object_unlock(l_object);
5109 l_object = NULL;
5110 }
5111 /*
5112 * Try to lock object; since we've alread got the
5113 * page queues lock, we can only 'try' for this one.
5114 * if the 'try' fails, we need to do a mutex_pause
5115 * to allow the owner of the object lock a chance to
5116 * run...
5117 */
5118 if ( !vm_object_lock_try_scan(m_object)) {
5119
5120 if (try_failed_count > 20) {
5121 hibernate_stats.hibernate_queue_nolock++;
5122
5123 goto reenter_pg_on_q;
5124 }
5125 vm_pageout_scan_wants_object = m_object;
5126
5127 vm_page_unlock_queues();
5128 mutex_pause(try_failed_count++);
5129 vm_page_lock_queues();
5130
5131 hibernate_stats.hibernate_queue_paused++;
5132 continue;
5133 } else {
5134 l_object = m_object;
5135 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5136 }
5137 }
5138 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) {
5139 /*
5140 * page is not to be cleaned
5141 * put it back on the head of its queue
5142 */
5143 if (m->cleaning)
5144 hibernate_stats.hibernate_skipped_cleaning++;
5145 else
5146 hibernate_stats.hibernate_skipped_transient++;
5147
5148 goto reenter_pg_on_q;
5149 }
5150 if (m_object->copy == VM_OBJECT_NULL) {
5151 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) {
5152 /*
5153 * let the normal hibernate image path
5154 * deal with these
5155 */
5156 goto reenter_pg_on_q;
5157 }
5158 }
5159 if ( !m->dirty && m->pmapped) {
5160 refmod_state = pmap_get_refmod(m->phys_page);
5161
5162 if ((refmod_state & VM_MEM_MODIFIED)) {
5163 SET_PAGE_DIRTY(m, FALSE);
5164 }
5165 } else
5166 refmod_state = 0;
5167
5168 if ( !m->dirty) {
5169 /*
5170 * page is not to be cleaned
5171 * put it back on the head of its queue
5172 */
5173 if (m->precious)
5174 hibernate_stats.hibernate_skipped_precious++;
5175
5176 goto reenter_pg_on_q;
5177 }
5178
5179 if (hibernate_skip_external == TRUE && !m_object->internal) {
5180
5181 hibernate_stats.hibernate_skipped_external++;
5182
5183 goto reenter_pg_on_q;
5184 }
5185 tq = NULL;
5186
5187 if (m_object->internal) {
5188 if (VM_PAGE_Q_THROTTLED(iq))
5189 tq = iq;
5190 } else if (VM_PAGE_Q_THROTTLED(eq))
5191 tq = eq;
5192
5193 if (tq != NULL) {
5194 wait_result_t wait_result;
5195 int wait_count = 5;
5196
5197 if (l_object != NULL) {
5198 vm_object_unlock(l_object);
5199 l_object = NULL;
5200 }
5201 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5202
5203 while (retval == 0) {
5204
5205 tq->pgo_throttled = TRUE;
5206
5207 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC);
5208
5209 vm_page_unlock_queues();
5210
5211 wait_result = thread_block(THREAD_CONTINUE_NULL);
5212
5213 vm_page_lock_queues();
5214
5215 if (wait_result != THREAD_TIMED_OUT)
5216 break;
5217 if (!VM_PAGE_Q_THROTTLED(tq))
5218 break;
5219
5220 if (hibernate_should_abort())
5221 retval = 1;
5222
5223 if (--wait_count == 0) {
5224
5225 hibernate_stats.hibernate_throttle_timeout++;
5226
5227 if (tq == eq) {
5228 hibernate_skip_external = TRUE;
5229 break;
5230 }
5231 retval = 1;
5232 }
5233 }
5234 if (retval)
5235 break;
5236
5237 hibernate_stats.hibernate_throttled++;
5238
5239 continue;
5240 }
5241 /*
5242 * we've already factored out pages in the laundry which
5243 * means this page can't be on the pageout queue so it's
5244 * safe to do the VM_PAGE_QUEUES_REMOVE
5245 */
5246 assert(!m->pageout_queue);
5247
5248 VM_PAGE_QUEUES_REMOVE(m);
5249
5250 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE)
5251 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL);
5252
5253 vm_pageout_cluster(m, FALSE);
5254
5255 hibernate_stats.hibernate_found_dirty++;
5256
5257 goto next_pg;
5258
5259 reenter_pg_on_q:
5260 queue_remove(q, m, vm_page_t, pageq);
5261 queue_enter(q, m, vm_page_t, pageq);
5262
5263 hibernate_stats.hibernate_reentered_on_q++;
5264 next_pg:
5265 hibernate_stats.hibernate_considered++;
5266
5267 qcount--;
5268 try_failed_count = 0;
5269 }
5270 if (l_object != NULL) {
5271 vm_object_unlock(l_object);
5272 l_object = NULL;
5273 }
5274 vm_pageout_scan_wants_object = VM_OBJECT_NULL;
5275
5276 vm_page_unlock_queues();
5277
5278 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0);
5279
5280 return (retval);
5281 }
5282
5283
5284 static int
5285 hibernate_flush_dirty_pages(int pass)
5286 {
5287 struct vm_speculative_age_q *aq;
5288 uint32_t i;
5289
5290 if (vm_page_local_q) {
5291 for (i = 0; i < vm_page_local_q_count; i++)
5292 vm_page_reactivate_local(i, TRUE, FALSE);
5293 }
5294
5295 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) {
5296 int qcount;
5297 vm_page_t m;
5298
5299 aq = &vm_page_queue_speculative[i];
5300
5301 if (queue_empty(&aq->age_q))
5302 continue;
5303 qcount = 0;
5304
5305 vm_page_lockspin_queues();
5306
5307 queue_iterate(&aq->age_q,
5308 m,
5309 vm_page_t,
5310 pageq)
5311 {
5312 qcount++;
5313 }
5314 vm_page_unlock_queues();
5315
5316 if (qcount) {
5317 if (hibernate_flush_queue(&aq->age_q, qcount))
5318 return (1);
5319 }
5320 }
5321 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count))
5322 return (1);
5323 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count))
5324 return (1);
5325 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count))
5326 return (1);
5327 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal))
5328 return (1);
5329
5330 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5331 vm_compressor_record_warmup_start();
5332
5333 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) {
5334 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5335 vm_compressor_record_warmup_end();
5336 return (1);
5337 }
5338 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) {
5339 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5340 vm_compressor_record_warmup_end();
5341 return (1);
5342 }
5343 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1)
5344 vm_compressor_record_warmup_end();
5345
5346 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external))
5347 return (1);
5348
5349 return (0);
5350 }
5351
5352
5353 void
5354 hibernate_reset_stats()
5355 {
5356 bzero(&hibernate_stats, sizeof(struct hibernate_statistics));
5357 }
5358
5359
5360 int
5361 hibernate_flush_memory()
5362 {
5363 int retval;
5364
5365 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0);
5366
5367 hibernate_cleaning_in_progress = TRUE;
5368 hibernate_skip_external = FALSE;
5369
5370 if ((retval = hibernate_flush_dirty_pages(1)) == 0) {
5371
5372 if (COMPRESSED_PAGER_IS_ACTIVE) {
5373
5374 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5375
5376 vm_compressor_flush();
5377
5378 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0);
5379 }
5380 if (consider_buffer_cache_collect != NULL) {
5381 unsigned int orig_wire_count;
5382
5383 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5384 orig_wire_count = vm_page_wire_count;
5385
5386 (void)(*consider_buffer_cache_collect)(1);
5387 consider_zone_gc(TRUE);
5388
5389 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count);
5390
5391 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0);
5392 }
5393 }
5394 hibernate_cleaning_in_progress = FALSE;
5395
5396 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0);
5397
5398 if (retval && COMPRESSED_PAGER_IS_ACTIVE)
5399 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT);
5400
5401
5402 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n",
5403 hibernate_stats.hibernate_considered,
5404 hibernate_stats.hibernate_reentered_on_q,
5405 hibernate_stats.hibernate_found_dirty);
5406 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n",
5407 hibernate_stats.hibernate_skipped_cleaning,
5408 hibernate_stats.hibernate_skipped_transient,
5409 hibernate_stats.hibernate_skipped_precious,
5410 hibernate_stats.hibernate_skipped_external,
5411 hibernate_stats.hibernate_queue_nolock);
5412 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n",
5413 hibernate_stats.hibernate_queue_paused,
5414 hibernate_stats.hibernate_throttled,
5415 hibernate_stats.hibernate_throttle_timeout,
5416 hibernate_stats.hibernate_drained,
5417 hibernate_stats.hibernate_drain_timeout);
5418
5419 return (retval);
5420 }
5421
5422
5423 static void
5424 hibernate_page_list_zero(hibernate_page_list_t *list)
5425 {
5426 uint32_t bank;
5427 hibernate_bitmap_t * bitmap;
5428
5429 bitmap = &list->bank_bitmap[0];
5430 for (bank = 0; bank < list->bank_count; bank++)
5431 {
5432 uint32_t last_bit;
5433
5434 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2);
5435 // set out-of-bound bits at end of bitmap.
5436 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31);
5437 if (last_bit)
5438 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit);
5439
5440 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords];
5441 }
5442 }
5443
5444 void
5445 hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time)
5446 {
5447 uint32_t i;
5448 vm_page_t m;
5449 uint64_t start, end, timeout, nsec;
5450 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout);
5451 clock_get_uptime(&start);
5452
5453 for (i = 0; i < gobble_count; i++)
5454 {
5455 while (VM_PAGE_NULL == (m = vm_page_grab()))
5456 {
5457 clock_get_uptime(&end);
5458 if (end >= timeout)
5459 break;
5460 VM_PAGE_WAIT();
5461 }
5462 if (!m)
5463 break;
5464 m->busy = FALSE;
5465 vm_page_gobble(m);
5466
5467 m->pageq.next = (queue_entry_t) hibernate_gobble_queue;
5468 hibernate_gobble_queue = m;
5469 }
5470
5471 clock_get_uptime(&end);
5472 absolutetime_to_nanoseconds(end - start, &nsec);
5473 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL);
5474 }
5475
5476 void
5477 hibernate_free_gobble_pages(void)
5478 {
5479 vm_page_t m, next;
5480 uint32_t count = 0;
5481
5482 m = (vm_page_t) hibernate_gobble_queue;
5483 while(m)
5484 {
5485 next = (vm_page_t) m->pageq.next;
5486 vm_page_free(m);
5487 count++;
5488 m = next;
5489 }
5490 hibernate_gobble_queue = VM_PAGE_NULL;
5491
5492 if (count)
5493 HIBLOG("Freed %d pages\n", count);
5494 }
5495
5496 static boolean_t
5497 hibernate_consider_discard(vm_page_t m, boolean_t preflight)
5498 {
5499 vm_object_t object = NULL;
5500 int refmod_state;
5501 boolean_t discard = FALSE;
5502
5503 do
5504 {
5505 if (m->private)
5506 panic("hibernate_consider_discard: private");
5507
5508 if (!vm_object_lock_try(m->object)) {
5509 if (!preflight) hibernate_stats.cd_lock_failed++;
5510 break;
5511 }
5512 object = m->object;
5513
5514 if (VM_PAGE_WIRED(m)) {
5515 if (!preflight) hibernate_stats.cd_found_wired++;
5516 break;
5517 }
5518 if (m->precious) {
5519 if (!preflight) hibernate_stats.cd_found_precious++;
5520 break;
5521 }
5522 if (m->busy || !object->alive) {
5523 /*
5524 * Somebody is playing with this page.
5525 */
5526 if (!preflight) hibernate_stats.cd_found_busy++;
5527 break;
5528 }
5529 if (m->absent || m->unusual || m->error) {
5530 /*
5531 * If it's unusual in anyway, ignore it
5532 */
5533 if (!preflight) hibernate_stats.cd_found_unusual++;
5534 break;
5535 }
5536 if (m->cleaning) {
5537 if (!preflight) hibernate_stats.cd_found_cleaning++;
5538 break;
5539 }
5540 if (m->laundry) {
5541 if (!preflight) hibernate_stats.cd_found_laundry++;
5542 break;
5543 }
5544 if (!m->dirty)
5545 {
5546 refmod_state = pmap_get_refmod(m->phys_page);
5547
5548 if (refmod_state & VM_MEM_REFERENCED)
5549 m->reference = TRUE;
5550 if (refmod_state & VM_MEM_MODIFIED) {
5551 SET_PAGE_DIRTY(m, FALSE);
5552 }
5553 }
5554
5555 /*
5556 * If it's clean or purgeable we can discard the page on wakeup.
5557 */
5558 discard = (!m->dirty)
5559 || (VM_PURGABLE_VOLATILE == object->purgable)
5560 || (VM_PURGABLE_EMPTY == object->purgable);
5561
5562
5563 if (discard == FALSE) {
5564 if (!preflight)
5565 hibernate_stats.cd_found_dirty++;
5566 } else if (m->xpmapped && m->reference && !object->internal) {
5567 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) {
5568 if (!preflight)
5569 hibernate_stats.cd_found_xpmapped++;
5570 discard = FALSE;
5571 } else {
5572 if (!preflight)
5573 hibernate_stats.cd_skipped_xpmapped++;
5574 }
5575 }
5576 }
5577 while (FALSE);
5578
5579 if (object)
5580 vm_object_unlock(object);
5581
5582 return (discard);
5583 }
5584
5585
5586 static void
5587 hibernate_discard_page(vm_page_t m)
5588 {
5589 if (m->absent || m->unusual || m->error)
5590 /*
5591 * If it's unusual in anyway, ignore
5592 */
5593 return;
5594
5595 #if MACH_ASSERT || DEBUG
5596 vm_object_t object = m->object;
5597 if (!vm_object_lock_try(m->object))
5598 panic("hibernate_discard_page(%p) !vm_object_lock_try", m);
5599 #else
5600 /* No need to lock page queue for token delete, hibernate_vm_unlock()
5601 makes sure these locks are uncontended before sleep */
5602 #endif /* MACH_ASSERT || DEBUG */
5603
5604 if (m->pmapped == TRUE)
5605 {
5606 __unused int refmod_state = pmap_disconnect(m->phys_page);
5607 }
5608
5609 if (m->laundry)
5610 panic("hibernate_discard_page(%p) laundry", m);
5611 if (m->private)
5612 panic("hibernate_discard_page(%p) private", m);
5613 if (m->fictitious)
5614 panic("hibernate_discard_page(%p) fictitious", m);
5615
5616 if (VM_PURGABLE_VOLATILE == m->object->purgable)
5617 {
5618 /* object should be on a queue */
5619 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL));
5620 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object);
5621 assert(old_queue);
5622 if (m->object->purgeable_when_ripe) {
5623 vm_purgeable_token_delete_first(old_queue);
5624 }
5625 m->object->purgable = VM_PURGABLE_EMPTY;
5626
5627 /*
5628 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are
5629 * accounted in the "volatile" ledger, so no change here.
5630 * We have to update vm_page_purgeable_count, though, since we're
5631 * effectively purging this object.
5632 */
5633 unsigned int delta;
5634 assert(m->object->resident_page_count >= m->object->wired_page_count);
5635 delta = (m->object->resident_page_count - m->object->wired_page_count);
5636 assert(vm_page_purgeable_count >= delta);
5637 assert(delta > 0);
5638 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count);
5639 }
5640
5641 vm_page_free(m);
5642
5643 #if MACH_ASSERT || DEBUG
5644 vm_object_unlock(object);
5645 #endif /* MACH_ASSERT || DEBUG */
5646 }
5647
5648 /*
5649 Grab locks for hibernate_page_list_setall()
5650 */
5651 void
5652 hibernate_vm_lock_queues(void)
5653 {
5654 vm_object_lock(compressor_object);
5655 vm_page_lock_queues();
5656 lck_mtx_lock(&vm_page_queue_free_lock);
5657
5658 if (vm_page_local_q) {
5659 uint32_t i;
5660 for (i = 0; i < vm_page_local_q_count; i++) {
5661 struct vpl *lq;
5662 lq = &vm_page_local_q[i].vpl_un.vpl;
5663 VPL_LOCK(&lq->vpl_lock);
5664 }
5665 }
5666 }
5667
5668 void
5669 hibernate_vm_unlock_queues(void)
5670 {
5671 if (vm_page_local_q) {
5672 uint32_t i;
5673 for (i = 0; i < vm_page_local_q_count; i++) {
5674 struct vpl *lq;
5675 lq = &vm_page_local_q[i].vpl_un.vpl;
5676 VPL_UNLOCK(&lq->vpl_lock);
5677 }
5678 }
5679 lck_mtx_unlock(&vm_page_queue_free_lock);
5680 vm_page_unlock_queues();
5681 vm_object_unlock(compressor_object);
5682 }
5683
5684 /*
5685 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved,
5686 pages known to VM to not need saving are subtracted.
5687 Wired pages to be saved are present in page_list_wired, pageable in page_list.
5688 */
5689
5690 void
5691 hibernate_page_list_setall(hibernate_page_list_t * page_list,
5692 hibernate_page_list_t * page_list_wired,
5693 hibernate_page_list_t * page_list_pal,
5694 boolean_t preflight,
5695 boolean_t will_discard,
5696 uint32_t * pagesOut)
5697 {
5698 uint64_t start, end, nsec;
5699 vm_page_t m;
5700 vm_page_t next;
5701 uint32_t pages = page_list->page_count;
5702 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0;
5703 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0;
5704 uint32_t count_wire = pages;
5705 uint32_t count_discard_active = 0;
5706 uint32_t count_discard_inactive = 0;
5707 uint32_t count_discard_cleaned = 0;
5708 uint32_t count_discard_purgeable = 0;
5709 uint32_t count_discard_speculative = 0;
5710 uint32_t count_discard_vm_struct_pages = 0;
5711 uint32_t i;
5712 uint32_t bank;
5713 hibernate_bitmap_t * bitmap;
5714 hibernate_bitmap_t * bitmap_wired;
5715 boolean_t discard_all;
5716 boolean_t discard;
5717
5718 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired);
5719
5720 if (preflight) {
5721 page_list = NULL;
5722 page_list_wired = NULL;
5723 page_list_pal = NULL;
5724 discard_all = FALSE;
5725 } else {
5726 discard_all = will_discard;
5727 }
5728
5729 #if MACH_ASSERT || DEBUG
5730 if (!preflight)
5731 {
5732 vm_page_lock_queues();
5733 if (vm_page_local_q) {
5734 for (i = 0; i < vm_page_local_q_count; i++) {
5735 struct vpl *lq;
5736 lq = &vm_page_local_q[i].vpl_un.vpl;
5737 VPL_LOCK(&lq->vpl_lock);
5738 }
5739 }
5740 }
5741 #endif /* MACH_ASSERT || DEBUG */
5742
5743
5744 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0);
5745
5746 clock_get_uptime(&start);
5747
5748 if (!preflight) {
5749 hibernate_page_list_zero(page_list);
5750 hibernate_page_list_zero(page_list_wired);
5751 hibernate_page_list_zero(page_list_pal);
5752
5753 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count;
5754 hibernate_stats.cd_pages = pages;
5755 }
5756
5757 if (vm_page_local_q) {
5758 for (i = 0; i < vm_page_local_q_count; i++)
5759 vm_page_reactivate_local(i, TRUE, !preflight);
5760 }
5761
5762 if (preflight) {
5763 vm_object_lock(compressor_object);
5764 vm_page_lock_queues();
5765 lck_mtx_lock(&vm_page_queue_free_lock);
5766 }
5767
5768 m = (vm_page_t) hibernate_gobble_queue;
5769 while (m)
5770 {
5771 pages--;
5772 count_wire--;
5773 if (!preflight) {
5774 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5775 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5776 }
5777 m = (vm_page_t) m->pageq.next;
5778 }
5779
5780 if (!preflight) for( i = 0; i < real_ncpus; i++ )
5781 {
5782 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor)
5783 {
5784 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next)
5785 {
5786 pages--;
5787 count_wire--;
5788 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5789 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5790
5791 hibernate_stats.cd_local_free++;
5792 hibernate_stats.cd_total_free++;
5793 }
5794 }
5795 }
5796
5797 for( i = 0; i < vm_colors; i++ )
5798 {
5799 queue_iterate(&vm_page_queue_free[i],
5800 m,
5801 vm_page_t,
5802 pageq)
5803 {
5804 pages--;
5805 count_wire--;
5806 if (!preflight) {
5807 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5808 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5809
5810 hibernate_stats.cd_total_free++;
5811 }
5812 }
5813 }
5814
5815 queue_iterate(&vm_lopage_queue_free,
5816 m,
5817 vm_page_t,
5818 pageq)
5819 {
5820 pages--;
5821 count_wire--;
5822 if (!preflight) {
5823 hibernate_page_bitset(page_list, TRUE, m->phys_page);
5824 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5825
5826 hibernate_stats.cd_total_free++;
5827 }
5828 }
5829
5830 m = (vm_page_t) queue_first(&vm_page_queue_throttled);
5831 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m))
5832 {
5833 next = (vm_page_t) m->pageq.next;
5834 discard = FALSE;
5835 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5836 && hibernate_consider_discard(m, preflight))
5837 {
5838 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5839 count_discard_inactive++;
5840 discard = discard_all;
5841 }
5842 else
5843 count_throttled++;
5844 count_wire--;
5845 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5846
5847 if (discard) hibernate_discard_page(m);
5848 m = next;
5849 }
5850
5851 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
5852 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
5853 {
5854 next = (vm_page_t) m->pageq.next;
5855 discard = FALSE;
5856 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5857 && hibernate_consider_discard(m, preflight))
5858 {
5859 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5860 if (m->dirty)
5861 count_discard_purgeable++;
5862 else
5863 count_discard_inactive++;
5864 discard = discard_all;
5865 }
5866 else
5867 count_anonymous++;
5868 count_wire--;
5869 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5870 if (discard) hibernate_discard_page(m);
5871 m = next;
5872 }
5873
5874 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
5875 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
5876 {
5877 next = (vm_page_t) m->pageq.next;
5878 discard = FALSE;
5879 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5880 && hibernate_consider_discard(m, preflight))
5881 {
5882 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5883 if (m->dirty)
5884 count_discard_purgeable++;
5885 else
5886 count_discard_cleaned++;
5887 discard = discard_all;
5888 }
5889 else
5890 count_cleaned++;
5891 count_wire--;
5892 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5893 if (discard) hibernate_discard_page(m);
5894 m = next;
5895 }
5896
5897 m = (vm_page_t) queue_first(&vm_page_queue_active);
5898 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
5899 {
5900 next = (vm_page_t) m->pageq.next;
5901 discard = FALSE;
5902 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode)
5903 && hibernate_consider_discard(m, preflight))
5904 {
5905 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5906 if (m->dirty)
5907 count_discard_purgeable++;
5908 else
5909 count_discard_active++;
5910 discard = discard_all;
5911 }
5912 else
5913 count_active++;
5914 count_wire--;
5915 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5916 if (discard) hibernate_discard_page(m);
5917 m = next;
5918 }
5919
5920 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
5921 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
5922 {
5923 next = (vm_page_t) m->pageq.next;
5924 discard = FALSE;
5925 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5926 && hibernate_consider_discard(m, preflight))
5927 {
5928 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5929 if (m->dirty)
5930 count_discard_purgeable++;
5931 else
5932 count_discard_inactive++;
5933 discard = discard_all;
5934 }
5935 else
5936 count_inactive++;
5937 count_wire--;
5938 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5939 if (discard) hibernate_discard_page(m);
5940 m = next;
5941 }
5942
5943 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
5944 {
5945 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
5946 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
5947 {
5948 next = (vm_page_t) m->pageq.next;
5949 discard = FALSE;
5950 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode)
5951 && hibernate_consider_discard(m, preflight))
5952 {
5953 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page);
5954 count_discard_speculative++;
5955 discard = discard_all;
5956 }
5957 else
5958 count_speculative++;
5959 count_wire--;
5960 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5961 if (discard) hibernate_discard_page(m);
5962 m = next;
5963 }
5964 }
5965
5966 queue_iterate(&compressor_object->memq, m, vm_page_t, listq)
5967 {
5968 count_compressor++;
5969 count_wire--;
5970 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page);
5971 }
5972
5973 if (preflight == FALSE && discard_all == TRUE) {
5974 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0);
5975
5976 HIBLOG("hibernate_teardown started\n");
5977 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired);
5978 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages);
5979
5980 pages -= count_discard_vm_struct_pages;
5981 count_wire -= count_discard_vm_struct_pages;
5982
5983 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages;
5984
5985 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
5986 }
5987
5988 if (!preflight) {
5989 // pull wired from hibernate_bitmap
5990 bitmap = &page_list->bank_bitmap[0];
5991 bitmap_wired = &page_list_wired->bank_bitmap[0];
5992 for (bank = 0; bank < page_list->bank_count; bank++)
5993 {
5994 for (i = 0; i < bitmap->bitmapwords; i++)
5995 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i];
5996 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords];
5997 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords];
5998 }
5999 }
6000
6001 // machine dependent adjustments
6002 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages);
6003
6004 if (!preflight) {
6005 hibernate_stats.cd_count_wire = count_wire;
6006 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable +
6007 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages;
6008 }
6009
6010 clock_get_uptime(&end);
6011 absolutetime_to_nanoseconds(end - start, &nsec);
6012 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL);
6013
6014 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n",
6015 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped,
6016 discard_all ? "did" : "could",
6017 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6018
6019 if (hibernate_stats.cd_skipped_xpmapped)
6020 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped);
6021
6022 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned;
6023
6024 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active;
6025
6026 #if MACH_ASSERT || DEBUG
6027 if (!preflight)
6028 {
6029 if (vm_page_local_q) {
6030 for (i = 0; i < vm_page_local_q_count; i++) {
6031 struct vpl *lq;
6032 lq = &vm_page_local_q[i].vpl_un.vpl;
6033 VPL_UNLOCK(&lq->vpl_lock);
6034 }
6035 }
6036 vm_page_unlock_queues();
6037 }
6038 #endif /* MACH_ASSERT || DEBUG */
6039
6040 if (preflight) {
6041 lck_mtx_unlock(&vm_page_queue_free_lock);
6042 vm_page_unlock_queues();
6043 vm_object_unlock(compressor_object);
6044 }
6045
6046 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0);
6047 }
6048
6049 void
6050 hibernate_page_list_discard(hibernate_page_list_t * page_list)
6051 {
6052 uint64_t start, end, nsec;
6053 vm_page_t m;
6054 vm_page_t next;
6055 uint32_t i;
6056 uint32_t count_discard_active = 0;
6057 uint32_t count_discard_inactive = 0;
6058 uint32_t count_discard_purgeable = 0;
6059 uint32_t count_discard_cleaned = 0;
6060 uint32_t count_discard_speculative = 0;
6061
6062
6063 #if MACH_ASSERT || DEBUG
6064 vm_page_lock_queues();
6065 if (vm_page_local_q) {
6066 for (i = 0; i < vm_page_local_q_count; i++) {
6067 struct vpl *lq;
6068 lq = &vm_page_local_q[i].vpl_un.vpl;
6069 VPL_LOCK(&lq->vpl_lock);
6070 }
6071 }
6072 #endif /* MACH_ASSERT || DEBUG */
6073
6074 clock_get_uptime(&start);
6075
6076 m = (vm_page_t) queue_first(&vm_page_queue_anonymous);
6077 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m))
6078 {
6079 next = (vm_page_t) m->pageq.next;
6080 if (hibernate_page_bittst(page_list, m->phys_page))
6081 {
6082 if (m->dirty)
6083 count_discard_purgeable++;
6084 else
6085 count_discard_inactive++;
6086 hibernate_discard_page(m);
6087 }
6088 m = next;
6089 }
6090
6091 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ )
6092 {
6093 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q);
6094 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m))
6095 {
6096 next = (vm_page_t) m->pageq.next;
6097 if (hibernate_page_bittst(page_list, m->phys_page))
6098 {
6099 count_discard_speculative++;
6100 hibernate_discard_page(m);
6101 }
6102 m = next;
6103 }
6104 }
6105
6106 m = (vm_page_t) queue_first(&vm_page_queue_inactive);
6107 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m))
6108 {
6109 next = (vm_page_t) m->pageq.next;
6110 if (hibernate_page_bittst(page_list, m->phys_page))
6111 {
6112 if (m->dirty)
6113 count_discard_purgeable++;
6114 else
6115 count_discard_inactive++;
6116 hibernate_discard_page(m);
6117 }
6118 m = next;
6119 }
6120
6121 m = (vm_page_t) queue_first(&vm_page_queue_active);
6122 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m))
6123 {
6124 next = (vm_page_t) m->pageq.next;
6125 if (hibernate_page_bittst(page_list, m->phys_page))
6126 {
6127 if (m->dirty)
6128 count_discard_purgeable++;
6129 else
6130 count_discard_active++;
6131 hibernate_discard_page(m);
6132 }
6133 m = next;
6134 }
6135
6136 m = (vm_page_t) queue_first(&vm_page_queue_cleaned);
6137 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m))
6138 {
6139 next = (vm_page_t) m->pageq.next;
6140 if (hibernate_page_bittst(page_list, m->phys_page))
6141 {
6142 if (m->dirty)
6143 count_discard_purgeable++;
6144 else
6145 count_discard_cleaned++;
6146 hibernate_discard_page(m);
6147 }
6148 m = next;
6149 }
6150
6151 #if MACH_ASSERT || DEBUG
6152 if (vm_page_local_q) {
6153 for (i = 0; i < vm_page_local_q_count; i++) {
6154 struct vpl *lq;
6155 lq = &vm_page_local_q[i].vpl_un.vpl;
6156 VPL_UNLOCK(&lq->vpl_lock);
6157 }
6158 }
6159 vm_page_unlock_queues();
6160 #endif /* MACH_ASSERT || DEBUG */
6161
6162 clock_get_uptime(&end);
6163 absolutetime_to_nanoseconds(end - start, &nsec);
6164 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n",
6165 nsec / 1000000ULL,
6166 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned);
6167 }
6168
6169 boolean_t hibernate_paddr_map_inited = FALSE;
6170 boolean_t hibernate_rebuild_needed = FALSE;
6171 unsigned int hibernate_teardown_last_valid_compact_indx = -1;
6172 vm_page_t hibernate_rebuild_hash_list = NULL;
6173
6174 unsigned int hibernate_teardown_found_tabled_pages = 0;
6175 unsigned int hibernate_teardown_found_created_pages = 0;
6176 unsigned int hibernate_teardown_found_free_pages = 0;
6177 unsigned int hibernate_teardown_vm_page_free_count;
6178
6179
6180 struct ppnum_mapping {
6181 struct ppnum_mapping *ppnm_next;
6182 ppnum_t ppnm_base_paddr;
6183 unsigned int ppnm_sindx;
6184 unsigned int ppnm_eindx;
6185 };
6186
6187 struct ppnum_mapping *ppnm_head;
6188 struct ppnum_mapping *ppnm_last_found = NULL;
6189
6190
6191 void
6192 hibernate_create_paddr_map()
6193 {
6194 unsigned int i;
6195 ppnum_t next_ppnum_in_run = 0;
6196 struct ppnum_mapping *ppnm = NULL;
6197
6198 if (hibernate_paddr_map_inited == FALSE) {
6199
6200 for (i = 0; i < vm_pages_count; i++) {
6201
6202 if (ppnm)
6203 ppnm->ppnm_eindx = i;
6204
6205 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) {
6206
6207 ppnm = kalloc(sizeof(struct ppnum_mapping));
6208
6209 ppnm->ppnm_next = ppnm_head;
6210 ppnm_head = ppnm;
6211
6212 ppnm->ppnm_sindx = i;
6213 ppnm->ppnm_base_paddr = vm_pages[i].phys_page;
6214 }
6215 next_ppnum_in_run = vm_pages[i].phys_page + 1;
6216 }
6217 ppnm->ppnm_eindx++;
6218
6219 hibernate_paddr_map_inited = TRUE;
6220 }
6221 }
6222
6223 ppnum_t
6224 hibernate_lookup_paddr(unsigned int indx)
6225 {
6226 struct ppnum_mapping *ppnm = NULL;
6227
6228 ppnm = ppnm_last_found;
6229
6230 if (ppnm) {
6231 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx)
6232 goto done;
6233 }
6234 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) {
6235
6236 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) {
6237 ppnm_last_found = ppnm;
6238 break;
6239 }
6240 }
6241 if (ppnm == NULL)
6242 panic("hibernate_lookup_paddr of %d failed\n", indx);
6243 done:
6244 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx));
6245 }
6246
6247
6248 uint32_t
6249 hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6250 {
6251 addr64_t saddr_aligned;
6252 addr64_t eaddr_aligned;
6253 addr64_t addr;
6254 ppnum_t paddr;
6255 unsigned int mark_as_unneeded_pages = 0;
6256
6257 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64;
6258 eaddr_aligned = eaddr & ~PAGE_MASK_64;
6259
6260 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) {
6261
6262 paddr = pmap_find_phys(kernel_pmap, addr);
6263
6264 assert(paddr);
6265
6266 hibernate_page_bitset(page_list, TRUE, paddr);
6267 hibernate_page_bitset(page_list_wired, TRUE, paddr);
6268
6269 mark_as_unneeded_pages++;
6270 }
6271 return (mark_as_unneeded_pages);
6272 }
6273
6274
6275 void
6276 hibernate_hash_insert_page(vm_page_t mem)
6277 {
6278 vm_page_bucket_t *bucket;
6279 int hash_id;
6280
6281 assert(mem->hashed);
6282 assert(mem->object);
6283 assert(mem->offset != (vm_object_offset_t) -1);
6284
6285 /*
6286 * Insert it into the object_object/offset hash table
6287 */
6288 hash_id = vm_page_hash(mem->object, mem->offset);
6289 bucket = &vm_page_buckets[hash_id];
6290
6291 mem->next_m = bucket->page_list;
6292 bucket->page_list = VM_PAGE_PACK_PTR(mem);
6293 }
6294
6295
6296 void
6297 hibernate_free_range(int sindx, int eindx)
6298 {
6299 vm_page_t mem;
6300 unsigned int color;
6301
6302 while (sindx < eindx) {
6303 mem = &vm_pages[sindx];
6304
6305 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE);
6306
6307 mem->lopage = FALSE;
6308 mem->free = TRUE;
6309
6310 color = mem->phys_page & vm_color_mask;
6311 queue_enter_first(&vm_page_queue_free[color],
6312 mem,
6313 vm_page_t,
6314 pageq);
6315 vm_page_free_count++;
6316
6317 sindx++;
6318 }
6319 }
6320
6321
6322 extern void hibernate_rebuild_pmap_structs(void);
6323
6324 void
6325 hibernate_rebuild_vm_structs(void)
6326 {
6327 int cindx, sindx, eindx;
6328 vm_page_t mem, tmem, mem_next;
6329 AbsoluteTime startTime, endTime;
6330 uint64_t nsec;
6331
6332 if (hibernate_rebuild_needed == FALSE)
6333 return;
6334
6335 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0);
6336 HIBLOG("hibernate_rebuild started\n");
6337
6338 clock_get_uptime(&startTime);
6339
6340 hibernate_rebuild_pmap_structs();
6341
6342 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t));
6343 eindx = vm_pages_count;
6344
6345 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) {
6346
6347 mem = &vm_pages[cindx];
6348 /*
6349 * hibernate_teardown_vm_structs leaves the location where
6350 * this vm_page_t must be located in "next".
6351 */
6352 tmem = VM_PAGE_UNPACK_PTR(mem->next_m);
6353 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6354
6355 sindx = (int)(tmem - &vm_pages[0]);
6356
6357 if (mem != tmem) {
6358 /*
6359 * this vm_page_t was moved by hibernate_teardown_vm_structs,
6360 * so move it back to its real location
6361 */
6362 *tmem = *mem;
6363 mem = tmem;
6364 }
6365 if (mem->hashed)
6366 hibernate_hash_insert_page(mem);
6367 /*
6368 * the 'hole' between this vm_page_t and the previous
6369 * vm_page_t we moved needs to be initialized as
6370 * a range of free vm_page_t's
6371 */
6372 hibernate_free_range(sindx + 1, eindx);
6373
6374 eindx = sindx;
6375 }
6376 if (sindx)
6377 hibernate_free_range(0, sindx);
6378
6379 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count);
6380
6381 /*
6382 * process the list of vm_page_t's that were entered in the hash,
6383 * but were not located in the vm_pages arrary... these are
6384 * vm_page_t's that were created on the fly (i.e. fictitious)
6385 */
6386 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) {
6387 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6388
6389 mem->next_m = VM_PAGE_PACK_PTR(NULL);
6390 hibernate_hash_insert_page(mem);
6391 }
6392 hibernate_rebuild_hash_list = NULL;
6393
6394 clock_get_uptime(&endTime);
6395 SUB_ABSOLUTETIME(&endTime, &startTime);
6396 absolutetime_to_nanoseconds(endTime, &nsec);
6397
6398 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL);
6399
6400 hibernate_rebuild_needed = FALSE;
6401
6402 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0);
6403 }
6404
6405
6406 extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *);
6407
6408 uint32_t
6409 hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired)
6410 {
6411 unsigned int i;
6412 unsigned int compact_target_indx;
6413 vm_page_t mem, mem_next;
6414 vm_page_bucket_t *bucket;
6415 unsigned int mark_as_unneeded_pages = 0;
6416 unsigned int unneeded_vm_page_bucket_pages = 0;
6417 unsigned int unneeded_vm_pages_pages = 0;
6418 unsigned int unneeded_pmap_pages = 0;
6419 addr64_t start_of_unneeded = 0;
6420 addr64_t end_of_unneeded = 0;
6421
6422
6423 if (hibernate_should_abort())
6424 return (0);
6425
6426 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n",
6427 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count,
6428 vm_page_cleaned_count, compressor_object->resident_page_count);
6429
6430 for (i = 0; i < vm_page_bucket_count; i++) {
6431
6432 bucket = &vm_page_buckets[i];
6433
6434 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) {
6435 assert(mem->hashed);
6436
6437 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m);
6438
6439 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) {
6440 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list);
6441 hibernate_rebuild_hash_list = mem;
6442 }
6443 }
6444 }
6445 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired);
6446 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages;
6447
6448 hibernate_teardown_vm_page_free_count = vm_page_free_count;
6449
6450 compact_target_indx = 0;
6451
6452 for (i = 0; i < vm_pages_count; i++) {
6453
6454 mem = &vm_pages[i];
6455
6456 if (mem->free) {
6457 unsigned int color;
6458
6459 assert(mem->busy);
6460 assert(!mem->lopage);
6461
6462 color = mem->phys_page & vm_color_mask;
6463
6464 queue_remove(&vm_page_queue_free[color],
6465 mem,
6466 vm_page_t,
6467 pageq);
6468 mem->pageq.next = NULL;
6469 mem->pageq.prev = NULL;
6470
6471 vm_page_free_count--;
6472
6473 hibernate_teardown_found_free_pages++;
6474
6475 if ( !vm_pages[compact_target_indx].free)
6476 compact_target_indx = i;
6477 } else {
6478 /*
6479 * record this vm_page_t's original location
6480 * we need this even if it doesn't get moved
6481 * as an indicator to the rebuild function that
6482 * we don't have to move it
6483 */
6484 mem->next_m = VM_PAGE_PACK_PTR(mem);
6485
6486 if (vm_pages[compact_target_indx].free) {
6487 /*
6488 * we've got a hole to fill, so
6489 * move this vm_page_t to it's new home
6490 */
6491 vm_pages[compact_target_indx] = *mem;
6492 mem->free = TRUE;
6493
6494 hibernate_teardown_last_valid_compact_indx = compact_target_indx;
6495 compact_target_indx++;
6496 } else
6497 hibernate_teardown_last_valid_compact_indx = i;
6498 }
6499 }
6500 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1],
6501 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired);
6502 mark_as_unneeded_pages += unneeded_vm_pages_pages;
6503
6504 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded);
6505
6506 if (start_of_unneeded) {
6507 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired);
6508 mark_as_unneeded_pages += unneeded_pmap_pages;
6509 }
6510 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages);
6511
6512 hibernate_rebuild_needed = TRUE;
6513
6514 return (mark_as_unneeded_pages);
6515 }
6516
6517
6518 #endif /* HIBERNATION */
6519
6520 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
6521
6522 #include <mach_vm_debug.h>
6523 #if MACH_VM_DEBUG
6524
6525 #include <mach_debug/hash_info.h>
6526 #include <vm/vm_debug.h>
6527
6528 /*
6529 * Routine: vm_page_info
6530 * Purpose:
6531 * Return information about the global VP table.
6532 * Fills the buffer with as much information as possible
6533 * and returns the desired size of the buffer.
6534 * Conditions:
6535 * Nothing locked. The caller should provide
6536 * possibly-pageable memory.
6537 */
6538
6539 unsigned int
6540 vm_page_info(
6541 hash_info_bucket_t *info,
6542 unsigned int count)
6543 {
6544 unsigned int i;
6545 lck_spin_t *bucket_lock;
6546
6547 if (vm_page_bucket_count < count)
6548 count = vm_page_bucket_count;
6549
6550 for (i = 0; i < count; i++) {
6551 vm_page_bucket_t *bucket = &vm_page_buckets[i];
6552 unsigned int bucket_count = 0;
6553 vm_page_t m;
6554
6555 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6556 lck_spin_lock(bucket_lock);
6557
6558 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m))
6559 bucket_count++;
6560
6561 lck_spin_unlock(bucket_lock);
6562
6563 /* don't touch pageable memory while holding locks */
6564 info[i].hib_count = bucket_count;
6565 }
6566
6567 return vm_page_bucket_count;
6568 }
6569 #endif /* MACH_VM_DEBUG */
6570
6571 #if VM_PAGE_BUCKETS_CHECK
6572 void
6573 vm_page_buckets_check(void)
6574 {
6575 unsigned int i;
6576 vm_page_t p;
6577 unsigned int p_hash;
6578 vm_page_bucket_t *bucket;
6579 lck_spin_t *bucket_lock;
6580
6581 if (!vm_page_buckets_check_ready) {
6582 return;
6583 }
6584
6585 #if HIBERNATION
6586 if (hibernate_rebuild_needed ||
6587 hibernate_rebuild_hash_list) {
6588 panic("BUCKET_CHECK: hibernation in progress: "
6589 "rebuild_needed=%d rebuild_hash_list=%p\n",
6590 hibernate_rebuild_needed,
6591 hibernate_rebuild_hash_list);
6592 }
6593 #endif /* HIBERNATION */
6594
6595 #if VM_PAGE_FAKE_BUCKETS
6596 char *cp;
6597 for (cp = (char *) vm_page_fake_buckets_start;
6598 cp < (char *) vm_page_fake_buckets_end;
6599 cp++) {
6600 if (*cp != 0x5a) {
6601 panic("BUCKET_CHECK: corruption at %p in fake buckets "
6602 "[0x%llx:0x%llx]\n",
6603 cp,
6604 (uint64_t) vm_page_fake_buckets_start,
6605 (uint64_t) vm_page_fake_buckets_end);
6606 }
6607 }
6608 #endif /* VM_PAGE_FAKE_BUCKETS */
6609
6610 for (i = 0; i < vm_page_bucket_count; i++) {
6611 bucket = &vm_page_buckets[i];
6612 if (!bucket->page_list) {
6613 continue;
6614 }
6615
6616 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK];
6617 lck_spin_lock(bucket_lock);
6618 p = VM_PAGE_UNPACK_PTR(bucket->page_list);
6619 while (p != VM_PAGE_NULL) {
6620 if (!p->hashed) {
6621 panic("BUCKET_CHECK: page %p (%p,0x%llx) "
6622 "hash %d in bucket %d at %p "
6623 "is not hashed\n",
6624 p, p->object, p->offset,
6625 p_hash, i, bucket);
6626 }
6627 p_hash = vm_page_hash(p->object, p->offset);
6628 if (p_hash != i) {
6629 panic("BUCKET_CHECK: corruption in bucket %d "
6630 "at %p: page %p object %p offset 0x%llx "
6631 "hash %d\n",
6632 i, bucket, p, p->object, p->offset,
6633 p_hash);
6634 }
6635 p = VM_PAGE_UNPACK_PTR(p->next_m);
6636 }
6637 lck_spin_unlock(bucket_lock);
6638 }
6639
6640 // printf("BUCKET_CHECK: checked buckets\n");
6641 }
6642 #endif /* VM_PAGE_BUCKETS_CHECK */